Skip to content

Commit

Permalink
OPENNLP-855: New SentimentAnalysisParser
Browse files Browse the repository at this point in the history
  • Loading branch information
amensiko authored and mawiesne committed Jan 10, 2025
1 parent e86b47f commit e46c576
Show file tree
Hide file tree
Showing 20 changed files with 1,832 additions and 0 deletions.
8 changes: 8 additions & 0 deletions opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@
import opennlp.tools.cmdline.sentdetect.SentenceDetectorEvaluatorTool;
import opennlp.tools.cmdline.sentdetect.SentenceDetectorTool;
import opennlp.tools.cmdline.sentdetect.SentenceDetectorTrainerTool;
import opennlp.tools.cmdline.sentiment.SentimentCrossValidatorTool;
import opennlp.tools.cmdline.sentiment.SentimentEvaluatorTool;
import opennlp.tools.cmdline.sentiment.SentimentTrainerTool;
import opennlp.tools.cmdline.tokenizer.DictionaryDetokenizerTool;
import opennlp.tools.cmdline.tokenizer.SimpleTokenizerTool;
import opennlp.tools.cmdline.tokenizer.TokenizerConverterTool;
Expand Down Expand Up @@ -165,6 +168,11 @@ public final class CLI {

// Entity Linker
tools.add(new EntityLinkerTool());

// Sentiment Analysis Parser
tools.add(new SentimentTrainerTool());
tools.add(new SentimentEvaluatorTool());
tools.add(new SentimentCrossValidatorTool());

// Language Model
tools.add(new NGramLanguageModelTool());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import opennlp.tools.formats.NameSampleDataStreamFactory;
import opennlp.tools.formats.ParseSampleStreamFactory;
import opennlp.tools.formats.SentenceSampleStreamFactory;
import opennlp.tools.formats.SentimentSampleStreamFactory;
import opennlp.tools.formats.TokenSampleStreamFactory;
import opennlp.tools.formats.TwentyNewsgroupSampleStreamFactory;
import opennlp.tools.formats.WordTagSampleStreamFactory;
Expand Down Expand Up @@ -140,6 +141,8 @@ public final class StreamFactoryRegistry {
MascPOSSampleStreamFactory.registerFactory();
MascSentenceSampleStreamFactory.registerFactory();
MascTokenSampleStreamFactory.registerFactory();

SentimentSampleStreamFactory.registerFactory();
}

public static final String DEFAULT_FORMAT = "opennlp";
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package opennlp.tools.cmdline.sentiment;

import java.io.IOException;
import java.util.LinkedList;
import java.util.List;

import opennlp.tools.cmdline.AbstractCrossValidatorTool;
import opennlp.tools.cmdline.CmdLineUtil;
import opennlp.tools.cmdline.TerminateToolException;
import opennlp.tools.cmdline.params.BasicTrainingParams;
import opennlp.tools.cmdline.params.CVParams;
import opennlp.tools.cmdline.sentiment.SentimentCrossValidatorTool.CVToolParams;
import opennlp.tools.sentiment.SentimentCrossValidator;
import opennlp.tools.sentiment.SentimentEvaluationMonitor;
import opennlp.tools.sentiment.SentimentFactory;
import opennlp.tools.sentiment.SentimentSample;
import opennlp.tools.util.eval.EvaluationMonitor;
import opennlp.tools.util.model.ModelUtil;

/**
* Class for helping perform cross validation on the Sentiment Analysis Parser.
*/
public class SentimentCrossValidatorTool
extends AbstractCrossValidatorTool<SentimentSample, CVToolParams> {

/**
* Interface for parameters
*/
interface CVToolParams extends BasicTrainingParams, CVParams {

}

/**
* Constructor
*/
public SentimentCrossValidatorTool() {
super(SentimentSample.class, CVToolParams.class);
}

/**
* Returns the short description of the tool
*
* @return short description
*/
public String getShortDescription() {
return "K-fold cross validator for the learnable Sentiment Analysis Parser";
}

/**
* Runs the tool
*
* @param format
* the format to be used
* @param args
* the arguments
*/
public void run(String format, String[] args) {
super.run(format, args);

mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), true);
if (mlParams == null) {
mlParams = ModelUtil.createDefaultTrainingParameters();
}

List<EvaluationMonitor<SentimentSample>> listeners = new LinkedList<>();
if (params.getMisclassified()) {
listeners.add(new SentimentEvaluationErrorListener());
}
SentimentDetailedFMeasureListener detailedFListener = null;
SentimentFactory sentimentFactory = new SentimentFactory();

SentimentCrossValidator validator;
try {
validator = new SentimentCrossValidator(params.getLang(), mlParams, sentimentFactory,
listeners.toArray(new SentimentEvaluationMonitor[listeners.size()]));
validator.evaluate(sampleStream, params.getFolds());
} catch (IOException e) {
throw new TerminateToolException(-1,
"IO error while reading training data or indexing data: "
+ e.getMessage(),
e);
} finally {
try {
sampleStream.close();
} catch (IOException e) {
// sorry that this can fail
}
}

System.out.println("done");

System.out.println();

if (detailedFListener == null) {
System.out.println(validator.getFMeasure());
} else {
System.out.println(detailedFListener.toString());
}
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package opennlp.tools.cmdline.sentiment;

import opennlp.tools.cmdline.DetailedFMeasureListener;
import opennlp.tools.sentiment.SentimentEvaluationMonitor;
import opennlp.tools.sentiment.SentimentSample;
import opennlp.tools.util.Span;

/**
* Class for creating a detailed F-Measure listener
*/
public class SentimentDetailedFMeasureListener
extends DetailedFMeasureListener<SentimentSample>
implements SentimentEvaluationMonitor {

/**
* Returns the sentiment sample as a span array
*
* @param sample
* the sentiment sample to be returned
* @return span array of the sample
*/
@Override
protected Span[] asSpanArray(SentimentSample sample) {
return null;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package opennlp.tools.cmdline.sentiment;

import java.io.OutputStream;

import opennlp.tools.cmdline.EvaluationErrorPrinter;
import opennlp.tools.sentiment.SentimentSample;
import opennlp.tools.util.eval.EvaluationMonitor;

/**
* Class for creating an evaluation error listener.
*/
public class SentimentEvaluationErrorListener
extends EvaluationErrorPrinter<SentimentSample>
implements EvaluationMonitor<SentimentSample> {

/**
* Constructor
*/
public SentimentEvaluationErrorListener() {
super(System.err);
}

/**
* Constructor
*/
protected SentimentEvaluationErrorListener(OutputStream outputStream) {
super(outputStream);
}

/**
* Prints the error in case of a missclassification in the evaluator
*
* @param reference
* the sentiment sample reference to be used
* @param prediction
* the sentiment sampple prediction
*/
@Override
public void misclassified(SentimentSample reference,
SentimentSample prediction) {
printError(new String[] { reference.getSentiment() },
new String[] { prediction.getSentiment() }, reference, prediction,
reference.getSentence());
}

}
Loading

0 comments on commit e46c576

Please sign in to comment.