Add some common synonyms for unicode word searching

Fixes #1133
This commit is contained in:
Kovid Goyal 2018-11-13 15:46:33 +05:30
parent 718111582a
commit 3e26f96744
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
3 changed files with 15182 additions and 15177 deletions

View File

@ -87,6 +87,11 @@ def parse_ucd():
if category.startswith('M'):
marks.add(codepoint)
# Some common synonyms
word_search_map['bee'] |= word_search_map['honeybee']
word_search_map['lambda'] |= word_search_map['lamda']
word_search_map['lamda'] |= word_search_map['lambda']
def split_two(line):
spec, rest = line.split(';', 1)
@ -360,16 +365,15 @@ def gen_names():
root = TrieNode()
all_trie_nodes.append(root)
def add_word(word_idx):
word = word_map[word_idx]
def add_word(word_idx, word):
parent = root
for letter in map(ord, word):
idx = parent.add_letter(letter)
parent = all_trie_nodes[idx]
parent.match_offset = offsets_array[word_idx]
for i in range(len(word_map)):
add_word(i)
for i, word in enumerate(word_map):
add_word(i, word)
children_array = [0]
for node in all_trie_nodes:
if node.children:

File diff suppressed because one or more lines are too long

View File

@ -20,3 +20,4 @@ class TestUnicodeInput(BaseTest):
self.ae(matches('horiz', 'ell'), {0x2026, 0x22ef, 0x2b2c, 0x2b2d, 0xfe19})
self.ae(matches('horizontal', 'ell'), {0x2026, 0x22ef, 0x2b2c, 0x2b2d, 0xfe19})
self.assertFalse(matches('sfgsfgsfgfgsdg'))
self.assertIn(0x1f41d, matches('bee'))