Source Code of org.apache.ctakes.dependency.parser.ae.util.WriteClearNLPDescriptors$Options

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.ctakes.dependency.parser.ae.util;


import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;


import org.apache.ctakes.dependency.parser.ae.ClearNLPDependencyParserAE;
import org.apache.ctakes.dependency.parser.ae.ClearNLPSemanticRoleLabelerAE;
import org.apache.uima.UIMAException;
import org.apache.uima.UIMAFramework;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.resource.metadata.TypeSystemDescription;
import org.apache.uima.util.InvalidXMLException;
import org.apache.uima.util.XMLInputSource;
import org.apache.uima.util.XMLParser;
import org.cleartk.util.Options_ImplBase;
import org.kohsuke.args4j.Option;
import org.uimafit.factory.AggregateBuilder;
import org.uimafit.factory.AnalysisEngineFactory;
import org.uimafit.factory.TypeSystemDescriptionFactory;
import org.xml.sax.SAXException;






/**
 * Use this to generate UIMA xml description files for the ClearNLP analysis engines.
 *
 */
public class WriteClearNLPDescriptors {
  public static final String SIMPLE_SEGMENTER_PATH = "../ctakes-clinical-pipeline/desc/analysis_engine/SimpleSegmentAnnotator.xml";
  public static final String SENTENCE_DETECTOR_PATH="../ctakes-core/desc/analysis_engine/SentenceDetectorAnnotator.xml";
  public static final String TOKENIZER_PATH="../ctakes-core/desc/analysis_engine/TokenizerAnnotator.xml";
  public static final String LVG_BASE_TOKEN_ANNOTATOR_PATH="desc/analysis_engine/LvgBaseTokenAnnotator.xml";
  public static final String POS_TAGGER_PATH="../ctakes-pos-tagger/desc/POSTagger.xml";
  public static final String DEP_NAME="ClearNLPDependencyParser";
  public static final String SRL_NAME="ClearNLPSRL";


  public static class Options extends Options_ImplBase {
    @Option(name = "-o",
        aliases = "--outputRoot",
        usage = "specify the directory to write out descriptor files",
        required = false)
    public File outputRoot = new File("desc/analysis_engine");


    @Option(name = "-m",
        aliases = "--modelFile",
        usage = "specify the path to the relation extractor model jar file",
        required = false)
    public File modelFile = new File("model.jar");
  }




  /**
   * @param args
   * @throws IOException 
   * @throws UIMAException 
   * @throws SAXException 
   */
  public static void main(String[] args) throws IOException, UIMAException, SAXException {
    Options options = new Options();
    options.parseOptions(args);


    TypeSystemDescription typeSystem = TypeSystemDescriptionFactory.createTypeSystemDescriptionFromPath("../../../ctakes-type-system/src/main/resources/org/apache/ctakes/typesystem/types/TypeSystem.xml");


    AnalysisEngineDescription ClearNLPSRLDesc = AnalysisEngineFactory.createPrimitiveDescription(
        ClearNLPSemanticRoleLabelerAE.class,
        typeSystem
        );


    AnalysisEngineDescription ClearNLPDepParserDesc = AnalysisEngineFactory.createPrimitiveDescription(
        ClearNLPDependencyParserAE.class,
        typeSystem
        );
    


    System.out.println((new File("desc/analysis_engine")).getAbsolutePath());


    // Write standalone description files
    ClearNLPDepParserDesc.toXML(new FileOutputStream(new File(options.outputRoot, "ClearNLPDependencyParserAE.xml")));
    ClearNLPSRLDesc.toXML(new FileOutputStream(new File(options.outputRoot, "ClearNLPSemanticRoleLabelerAE.xml")));


    // Write aggregate plaintext description files
    AggregateBuilder aggregateBuilder = getPlaintextAggregateBuilder();
    writeAggregateDescriptions(aggregateBuilder, ClearNLPDepParserDesc, ClearNLPSRLDesc, options.outputRoot, "PlaintextAggregate.xml");


    // Write aggregate tokenized description files
    aggregateBuilder = getTokenizedAggregateBuilder();
    writeAggregateDescriptions(aggregateBuilder, ClearNLPDepParserDesc, ClearNLPSRLDesc, options.outputRoot, "TokenizedAggregate.xml");


    // Write aggregate tokenizedInf description files
    aggregateBuilder = getTokenizedInfPosAggregateBuilder();
    writeAggregateDescriptions(aggregateBuilder, ClearNLPDepParserDesc, ClearNLPSRLDesc, options.outputRoot, "TokenizedInfPosAggregate.xml");


  }


  /**
   * Builds the plaintext prepreprocessing pipeline for ClearNLP
   * @return
   * @throws InvalidXMLException
   * @throws IOException
   */
  public static AggregateBuilder getPlaintextAggregateBuilder() throws InvalidXMLException, IOException {
    AggregateBuilder aggregateBuilder = new AggregateBuilder();
    aggregateBuilder.add(loadDescription(SIMPLE_SEGMENTER_PATH));
    aggregateBuilder.add(loadDescription(SENTENCE_DETECTOR_PATH));
    aggregateBuilder.add(loadDescription(TOKENIZER_PATH));
    aggregateBuilder.add(loadDescription(LVG_BASE_TOKEN_ANNOTATOR_PATH));
    aggregateBuilder.add(loadDescription(POS_TAGGER_PATH));
    return aggregateBuilder;
  }




  /**
   * Builds the tokenized preprocessing pipeline for ClearNLP
   * @return
   * @throws InvalidXMLException
   * @throws IOException
   */
  public static AggregateBuilder getTokenizedAggregateBuilder() throws InvalidXMLException, IOException {
    AggregateBuilder aggregateBuilder = new AggregateBuilder();
    aggregateBuilder.add(loadDescription(SIMPLE_SEGMENTER_PATH));
    aggregateBuilder.add(loadDescription(LVG_BASE_TOKEN_ANNOTATOR_PATH));
    return aggregateBuilder;
  }


  /**
   * Builds the tokenizedInf preprocessing for ClearNLP
   * @return
   * @throws InvalidXMLException
   * @throws IOException
   */
  public static AggregateBuilder getTokenizedInfPosAggregateBuilder() throws InvalidXMLException, IOException {
    AggregateBuilder aggregateBuilder = new AggregateBuilder();
    aggregateBuilder.add(loadDescription(SIMPLE_SEGMENTER_PATH));
    aggregateBuilder.add(loadDescription(LVG_BASE_TOKEN_ANNOTATOR_PATH));
    aggregateBuilder.add(loadDescription(POS_TAGGER_PATH));
    return aggregateBuilder;
  }




  /**
   * Simple method to load xml description and return an AnalysisEngineDescription object
   * @param pathToDescription
   * @return
   * @throws IOException
   * @throws InvalidXMLException
   */
  public static AnalysisEngineDescription loadDescription(String pathToDescription) throws IOException, InvalidXMLException {
    File file = new File(pathToDescription);
    XMLParser parser = UIMAFramework.getXMLParser();
    XMLInputSource source = new XMLInputSource(file);
    AnalysisEngineDescription desc = parser.parseAnalysisEngineDescription(source);
    return desc;
  }
  
  private static void writeAggregateDescriptions(
      AggregateBuilder preprocessing, 
      AnalysisEngineDescription ClearNLPDepParserDesc, 
      AnalysisEngineDescription ClearNLPSRLDesc,
      File outputRoot,
      String aggregateSuffix) throws ResourceInitializationException, FileNotFoundException, SAXException, IOException {


    // Append Dependency Parser into aggregate and write description file
    preprocessing.add(ClearNLPDepParserDesc);
    preprocessing.createAggregateDescription().toXML(new FileOutputStream(new File(outputRoot, DEP_NAME + aggregateSuffix))); 
    // Append SRL Parser into aggregate and write description file
    preprocessing.add(ClearNLPSRLDesc);
    preprocessing.createAggregateDescription().toXML(new FileOutputStream(new File(outputRoot, SRL_NAME + aggregateSuffix))); 


  }




}
Source Code of org.apache.ctakes.dependency.parser.ae.util.WriteClearNLPDescriptors$Options

Related Classes of org.apache.ctakes.dependency.parser.ae.util.WriteClearNLPDescriptors$Options