/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.ctakes.relationextractor.eval;
import java.io.File;
import java.util.Collection;
import java.util.List;
import javax.annotation.Nullable;
import org.apache.ctakes.relationextractor.ae.ModifierExtractorAnnotator;
import org.apache.ctakes.relationextractor.eval.SHARPXMI.EvaluationOptions;
import org.apache.ctakes.typesystem.type.textsem.Modifier;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CASException;
import org.apache.uima.collection.CollectionReader;
import org.apache.uima.jcas.JCas;
import org.cleartk.classifier.jar.DefaultDataWriterFactory;
import org.cleartk.classifier.jar.DirectoryDataWriterFactory;
import org.cleartk.classifier.jar.GenericJarClassifierFactory;
import org.cleartk.classifier.jar.JarClassifierBuilder;
import org.cleartk.classifier.liblinear.LIBLINEARStringOutcomeDataWriter;
import org.cleartk.eval.AnnotationStatistics;
import org.uimafit.component.JCasAnnotator_ImplBase;
import org.uimafit.factory.AnalysisEngineFactory;
import org.uimafit.pipeline.JCasIterable;
import org.uimafit.pipeline.SimplePipeline;
import org.uimafit.util.JCasUtil;
import com.google.common.base.Function;
import com.google.common.collect.Lists;
import com.lexicalscope.jewel.cli.CliFactory;
public class ModifierExtractorEvaluation extends SHARPXMI.Evaluation_ImplBase {
public static final ParameterSettings BEST_PARAMETERS = new ParameterSettings(
LIBLINEARStringOutcomeDataWriter.class,
new String[] { "-s", "1", "-c", "0.5" });
public static void main(String[] args) throws Exception {
// parse the options, validate them, and generate XMI if necessary
final EvaluationOptions options = CliFactory.parseArguments(EvaluationOptions.class, args);
SHARPXMI.validate(options);
SHARPXMI.generateXMI(options);
// determine the grid of parameters to search through
// for the full set of LIBLINEAR parameters, see:
// https://github.com/bwaldvogel/liblinear-java/blob/master/src/main/java/de/bwaldvogel/liblinear/Train.java
List<ParameterSettings> gridOfSettings = Lists.newArrayList();
for (int solver : new int[] { 0 /* logistic regression */, 1 /* SVM */}) {
for (double svmCost : new double[] { 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 50, 100 }) {
gridOfSettings.add(new ParameterSettings(
LIBLINEARStringOutcomeDataWriter.class,
new String[] { "-s", String.valueOf(solver), "-c", String.valueOf(svmCost) }));
}
}
// run the evaluation
SHARPXMI.evaluate(
options,
BEST_PARAMETERS,
gridOfSettings,
new Function<ParameterSettings, ModifierExtractorEvaluation>() {
@Override
public ModifierExtractorEvaluation apply(@Nullable ParameterSettings params) {
return new ModifierExtractorEvaluation(new File("target/models/modifier"), params);
}
});
}
private ParameterSettings parameterSettings;
public ModifierExtractorEvaluation(File directory, ParameterSettings parameterSettings) {
super(directory);
this.parameterSettings = parameterSettings;
}
@Override
public void train(CollectionReader collectionReader, File directory) throws Exception {
System.err.printf("%s: %s:\n", this.getClass().getSimpleName(), directory.getName());
System.err.println(this.parameterSettings);
SimplePipeline.runPipeline(
collectionReader,
AnalysisEngineFactory.createPrimitiveDescription(OnlyGoldModifiers.class),
ModifierExtractorAnnotator.getDescription(
DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
this.parameterSettings.dataWriterClass,
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
directory.getPath()));
JarClassifierBuilder.trainAndPackage(directory, this.parameterSettings.trainingArguments);
}
@Override
protected AnnotationStatistics<String> test(CollectionReader collectionReader, File directory)
throws Exception {
AnalysisEngine classifierAnnotator =
AnalysisEngineFactory.createPrimitive(ModifierExtractorAnnotator.getDescription(
GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
JarClassifierBuilder.getModelJarFile(directory)));
AnnotationStatistics<String> stats = new AnnotationStatistics<String>();
for (JCas jCas : new JCasIterable(collectionReader, classifierAnnotator)) {
JCas goldView;
try {
goldView = jCas.getView(SHARPXMI.GOLD_VIEW_NAME);
} catch (CASException e) {
throw new AnalysisEngineProcessException(e);
}
Collection<Modifier> goldModifiers = JCasUtil.select(goldView, Modifier.class);
Collection<Modifier> systemModifiers = JCasUtil.select(jCas, Modifier.class);
stats.add(goldModifiers, systemModifiers);
}
System.err.print(stats);
System.err.println();
return stats;
}
/**
* Class that copies the manual {@link Modifier} annotations to the default
* CAS.
*/
public static class OnlyGoldModifiers extends JCasAnnotator_ImplBase {
@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
JCas goldView;
try {
goldView = jCas.getView(SHARPXMI.GOLD_VIEW_NAME);
} catch (CASException e) {
throw new AnalysisEngineProcessException(e);
}
// remove any automatically generated Modifiers
for (Modifier modifier : JCasUtil.select(jCas, Modifier.class)) {
modifier.removeFromIndexes();
}
// copy over the manually annotated Modifiers
for (Modifier goldModifier : JCasUtil.select(goldView, Modifier.class)) {
Modifier modifier = new Modifier(jCas, goldModifier.getBegin(), goldModifier.getEnd());
modifier.setTypeID(goldModifier.getTypeID());
modifier.setId(goldModifier.getId());
modifier.setDiscoveryTechnique(goldModifier.getDiscoveryTechnique());
modifier.setConfidence(goldModifier.getConfidence());
modifier.addToIndexes();
}
}
}
}