Skip to content

Commit

Permalink
Add merge phrases from explosion#523 (comment)
Browse files Browse the repository at this point in the history
  • Loading branch information
chssch committed Oct 22, 2016
1 parent 6b30cba commit cf7b6f7
Showing 1 changed file with 13 additions and 0 deletions.
13 changes: 13 additions & 0 deletions website/docs/tutorials/rule-based-matcher.jade
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,18 @@ p.u-text-large spaCy features a rule-matching engine that operates over tokens.

nlp = spacy.load('en', parser=False, entity=False)

def merge_phrases(matcher, doc, i, matches):
'''
Merge a phrase. We have to be careful here because we'll change the token indices.
To avoid problems, merge all the phrases once we're called on the last match.
'''
if i != len(matches)-1:
return None
# Get Span objects
spans = [(ent_id, label, doc[start : end]) for ent_id, label, start, end in matches]
for ent_id, label, span in spans:
span.merge(label=label, tag='NNP' if label else span.root.tag_)

matcher = Matcher(nlp.vocab)

matcher.add_entity(
Expand All @@ -17,6 +29,7 @@ p.u-text-large spaCy features a rule-matching engine that operates over tokens.
acceptor=None, # Accept or modify the match
on_match=merge_phrases # Callback to act on the matches
)

matcher.add_pattern(
"GoogleNow", # Entity ID -- Created if doesn't exist.
[ # The pattern is a list of *Token Specifiers*.
Expand Down

0 comments on commit cf7b6f7

Please sign in to comment.