Skip to content

Commit

Permalink
Do not automatically assign entities to character names
Browse files Browse the repository at this point in the history
This is handled in the manual annotations. Doing it automatically also has the side effect that the scope of some character names gets falsely assigned.
  • Loading branch information
pagelj committed May 13, 2019
1 parent 5568b17 commit 5acc8a3
Showing 1 changed file with 1 addition and 40 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -68,46 +68,7 @@ public void process(JCas jcas) throws AnalysisEngineProcessException {
pronouns = new LinkedList<String>();
}

// Step 1: We search for each name in the full text (ignoring token
// boundaries etc.)
Drama d = JCasUtil.selectSingle(jcas, Drama.class);
Pattern p;
Matcher m;
for (CastFigure cf : JCasUtil.select(jcas, CastFigure.class)) {
for (int j = 0; j < cf.getNames().size(); j++) {
String name = cf.getNames(j);

p = Pattern.compile("\\b" + Pattern.quote(name) + "\\b", Pattern.CASE_INSENSITIVE);
m = p.matcher(jcas.getDocumentText());
while (m.find()) {
// If the found token (or multi-token) looks ok given their
// part of speech tags,
// we consider it a mention
if (!matches(jcas, m.start(), m.end())) {
try {
// If there is an existing mention annotation on this span, check if the entity
// is the same as the cast figure
Mention existingMention = JCasUtil.selectSingleAt(jcas, Mention.class, m.start(), m.end());
if (!(existingMention.getEntity().getId() == cf.getId())) {
// If it is not the same entity, create the mention
Mention fm = AnnotationFactory.createAnnotation(jcas, m.start(), m.end(),
Mention.class);
fm.setSurfaceString(ArrayUtil.toStringArray(jcas, fm.getCoveredText().split(" ")));
fm.setEntity(cf);
}
} catch (Exception e) {
// If there is no existing mention annotation on the same span, simply create
// the mention with the cast figure as entity
Mention fm = AnnotationFactory.createAnnotation(jcas, m.start(), m.end(), Mention.class);
fm.setSurfaceString(ArrayUtil.toStringArray(jcas, fm.getCoveredText().split(" ")));
fm.setEntity(cf);
}
}
}
}
}

// 2. We connect first person pronouns to their speakers
// We connect first person pronouns to their speakers
for (Utterance utterance : JCasUtil.select(jcas, Utterance.class)) {
Collection<CastFigure> figures = DramaUtil.getCastFigures(utterance);
for (CastFigure currentFigure : figures) {
Expand Down

0 comments on commit 5acc8a3

Please sign in to comment.