Skip to content

Commit

Permalink
Merge pull request #8 from IQuick143/master
Browse files Browse the repository at this point in the history
Fixed Issue #7 and fixed grammar
  • Loading branch information
ZhukovAlexander authored May 15, 2019
2 parents 8357be4 + 3adc978 commit 85a2524
Showing 1 changed file with 70 additions and 41 deletions.
111 changes: 70 additions & 41 deletions triegex/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,56 +14,80 @@

class TriegexNode:

def __init__(self, char: str, end: bool, *childrens):
def __init__(self, char: str, end: bool, *children):
self.char = char if char is not None else ''
self.end = end
self.childrens = {children.char: children for children in childrens}
self.children = {child.char: child for child in children}

def __iter__(self):
return iter(sorted(self.childrens.values(), key=lambda x: x.char))
return iter(sorted(self.children.values(), key=lambda x: x.char))

def __len__(self):
return len(self.childrens)
return len(self.children)

def __repr__(self):
return "<TriegexNode: '{0.char}' end={0.end}>".format(self)

def __contains__(self, key):
return key in self.childrens
return key in self.children

def __getitem__(self, key):
return self.childrens[key]
return self.children[key]

def __delitem__(self, key):
del self.childrens[key]
del self.children[key]

def to_regex(self):
'''
RECURSIVE IMPLEMENTATION FOR REFERENCE
suffixes = [v.to_regex() for k, v in self.children.items()]
if self.end:
suffixes += [WORD_BOUNDARY]
if len(suffixes) > 1:
return self.char + GROUP.format(OR.join(suffixes))
elif len(suffixes) == 1:
return self.char + suffixes[0]
else:
return self.char
'''

stack = [self]
ready = []
waiting = []

while stack:
waiting.append(stack.pop())
stack.extend(waiting[-1])

while waiting:
node = waiting.pop()
result = node.char

# marks starting indices of children of a node
lookup = []

# Creates an ordered list of nodes starting with root and ending with leaves by using BFS
i = 0
j = 1
while i < len(stack):
stack.extend(sorted(stack[i].children.values(), key=lambda node: node.char))
lookup.append(j)
j += len(stack[i].children)
i += 1

i = len(stack)
# temp value array
sub_regexes = [None] * i
while i > 0:
# We start with leaves and end at root thus we decrement
i -= 1
node = stack[i]
# Get regexes of child nodes and make a root regex
suffixes = [sub_regexes[child] for child in range(lookup[i], lookup[i] + len(node.children))]
if node.end:
result += WORD_BOUNDARY

# if there is only one children, we can safely concatenate chars
# withoug nesting
elif len(node) == 1:
result += ready.pop()

elif len(node) > 1:
result += GROUP.format(OR.join(ready))
ready = []

ready.append(result)
return ready[-1]
# if the node is an ending node we add a \b character
suffixes += [WORD_BOUNDARY]
# If we arrive at the root node we have to add the NOTHING expression
if i == 0:
suffixes += [NOTHING]
if len(suffixes) > 1:
sub_regexes[i] = node.char + GROUP.format(OR.join(suffixes))
elif len(suffixes) == 1:
sub_regexes[i] = node.char + suffixes[0]
else:
sub_regexes[i] = node.char
# return the top Regex
return sub_regexes[0]


class Triegex(collections.MutableSet):
Expand All @@ -72,19 +96,24 @@ def __init__(self, *words):
Trigex constructor.
"""

# make sure we match nothing when no words are added
self._root = TriegexNode(None, False, TriegexNode(NOTHING, False))
self._root = TriegexNode(None, False)

for word in words:
self.add(word)

def add(self, word: str):
current = self._root
for letter in word[:-1]:
current = current.childrens.setdefault(letter,
TriegexNode(letter, False))
if letter in current.children:
current = current.children[letter]
else:
current = current.children.setdefault(letter,
TriegexNode(letter, False))
# this will ensure that we correctly match the word boundary
current.childrens[word[-1]] = TriegexNode(word[-1], True)
if word[-1] in current.children:
current.children[word[-1]].end = True
else:
current.children[word[-1]] = TriegexNode(word[-1], True)

def to_regex(self):
r"""
Expand All @@ -103,15 +132,15 @@ def _traverse(self):
while stack:
yield current
current = stack.pop()
stack.extend(current.childrens.values())
stack.extend(current.children.values())

def __iter__(self):
paths = {self._root.char: []}
for node in self._traverse():
for children in node:
paths[children.char] = [node.char] + paths[node.char]
if children.end:
char = children.char
for child in node:
paths[child.char] = [node.char] + paths[node.char]
if child.end:
char = child.char
yield ''.join(reversed([char] + paths[char]))

def __len__(self):
Expand Down

0 comments on commit 85a2524

Please sign in to comment.