Package org.apache.uima.collection

Examples of org.apache.uima.collection.CollectionReader


   
    Options options = new Options();
    options.parseOptions(args);

    List<File> trainFiles = Arrays.asList(options.inputDirectory.listFiles());
    CollectionReader collectionReader = getCollectionReader(trainFiles);
   
    AnalysisEngine relationExtractorPrinter = AnalysisEngineFactory.createPrimitive(
        RelationExtractorPrinter.class);
       
    SimplePipeline.runPipeline(collectionReader, relationExtractorPrinter);
View Full Code Here


    for(int i = 0; i < files.length; i++){
      paths[i] = files[i].getAbsolutePath();
    }
//    TypeSystemDescription typeSystem = TypeSystemDescriptionFactory.createTypeSystemDescriptionFromPath("../common-type-system/desc/common_type_system.xml");
    try {
      CollectionReader xmiReader = CollectionReaderFactory.createCollectionReader(XMIReader.class, XMIReader.PARAM_FILES, paths);
      AnalysisEngine treeConsumer = AnalysisEngineFactory.createPrimitive(TreeFeatureConsumer.class,
                          TreeFeatureConsumer.PARAM_OUTFILE, args[1],
                          TreeFeatureConsumer.PARAM_CLASSIFY_BOTH_DIRECTIONS, true);
      SimplePipeline.runPipeline(xmiReader, treeConsumer);
    } catch (ResourceInitializationException e) {
View Full Code Here

   
    Options options = new Options();
    options.parseOptions(args);

    List<File> trainFiles = Arrays.asList(options.inputDirectory.listFiles());
    CollectionReader collectionReader = getCollectionReader(trainFiles);
   
    AnalysisEngine goldAnnotationStatsCalculator = AnalysisEngineFactory.createPrimitive(
        GoldAnnotationStatsCalculator.class);
       
    SimplePipeline.runPipeline(collectionReader, goldAnnotationStatsCalculator);
View Full Code Here

public class OpenNLPPOSCollectionReaderTests {

  @Test
  public void testReader() throws ResourceInitializationException, IOException, CollectionException {
    CollectionReader collectionReader = TestUtil.getCR(new File("desc/test/OpenNLPPOSCollectionReader.xml"));
    AnalysisEngine analysisEngine = TestUtil.getAE(new File("desc/test/NullAnnotator.xml"));
    JCas jCas = analysisEngine.newJCas();
    collectionReader.getNext(jCas.getCas());
   
    BaseToken baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 0);
    assertEquals(0, baseToken.getBegin());
    assertEquals(1, baseToken.getEnd());
    assertEquals("A", baseToken.getCoveredText());
    assertEquals("A", baseToken.getPartOfSpeech());

    baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 1);
    assertEquals("farmer", baseToken.getCoveredText());
    assertEquals("B", baseToken.getPartOfSpeech());
   
    baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 2);
    assertEquals("went", baseToken.getCoveredText());
    assertEquals("CC", baseToken.getPartOfSpeech());
   
    baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 3);
    assertEquals("trotting", baseToken.getCoveredText());
    assertEquals("DDD", baseToken.getPartOfSpeech());
   
    baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 4);
    assertEquals("upon_A", baseToken.getCoveredText());
    assertEquals("E", baseToken.getPartOfSpeech());

    baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 5);
    assertEquals("his", baseToken.getCoveredText());
    assertEquals("EE", baseToken.getPartOfSpeech());

    baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 6);
    assertEquals(".", baseToken.getCoveredText());
    assertEquals(".", baseToken.getPartOfSpeech());

    CollectionException ce = null;
    jCas = analysisEngine.newJCas();
    try {
      collectionReader.getNext(jCas.getCas());
    } catch(CollectionException e) {
      ce = e;
    }
    assertNotNull(ce);
   
    jCas = analysisEngine.newJCas();
    collectionReader.getNext(jCas.getCas());

    baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 0);
    assertEquals("A_", baseToken.getCoveredText());
    assertEquals("A", baseToken.getPartOfSpeech());
View Full Code Here

    assertEquals("_", baseToken.getPartOfSpeech());
  }
 
  @Test
  public void testLoadWordsOnly() throws ResourceInitializationException, IOException, CollectionException {
    CollectionReader collectionReader = TestUtil.getCR(new File("desc/test/OpenNLPPOSCollectionReader2.xml"));
    AnalysisEngine analysisEngine = TestUtil.getAE(new File("desc/test/NullAnnotator.xml"));
    JCas jCas = analysisEngine.newJCas();
    collectionReader.getNext(jCas.getCas());
   
    BaseToken baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 0);
    assertEquals(0, baseToken.getBegin());
    assertEquals(1, baseToken.getEnd());
    assertEquals("A", baseToken.getCoveredText());
    assertNull(baseToken.getPartOfSpeech());

    baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 1);
    assertEquals("farmer", baseToken.getCoveredText());
    assertNull(baseToken.getPartOfSpeech());
   
    baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 2);
    assertEquals("went", baseToken.getCoveredText());
    assertNull(baseToken.getPartOfSpeech());
   
    baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 3);
    assertEquals("trotting", baseToken.getCoveredText());
    assertNull(baseToken.getPartOfSpeech());
   
    baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 4);
    assertEquals("upon_A", baseToken.getCoveredText());
    assertNull(baseToken.getPartOfSpeech());

    baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 5);
    assertEquals("his", baseToken.getCoveredText());
    assertNull(baseToken.getPartOfSpeech());

    baseToken = TestUtil.getFeatureStructureAtIndex(jCas, BaseToken.class, 6);
    assertEquals(".", baseToken.getCoveredText());
    assertNull(baseToken.getPartOfSpeech());

    CollectionException ce = null;
    jCas = analysisEngine.newJCas();
    try {
      collectionReader.getNext(jCas.getCas());
    } catch(CollectionException e) {
      ce = e;
    }
    assertNotNull(ce);
   
View Full Code Here

    throws Exception
  {
    // Run the pipeline with different similarity measures
    for (int i = 1; i <= 3; i++)
    { 
      CollectionReader reader = createReader(PlainTextCombinationReader.class,
          PlainTextCombinationReader.PARAM_INPUT_DIR, "classpath:/datasets/test/plaintext",
          PlainTextCombinationReader.PARAM_LANGUAGE, "en",
          PlainTextCombinationReader.PARAM_COMBINATION_STRATEGY, CombinationStrategy.SAME_ROW_ONLY.toString());
 
      AnalysisEngineDescription seg = createEngineDescription(
View Full Code Here

public class GoldstandardCreator
{
  public static void outputGoldstandard(Dataset dataset)
    throws Exception
  {         
    CollectionReader reader = createReader(
                RTECorpusReader.class,
                RTECorpusReader.PARAM_COMBINATION_STRATEGY, CombinationStrategy.SAME_ROW_ONLY,
                RTECorpusReader.PARAM_INPUT_FILE, RteUtil.getInputFilePathForDataset(DATASET_DIR, dataset));
       
        AnalysisEngineDescription tagger = createEngineDescription(
View Full Code Here

    {     
      if (new File(OUTPUT_FEATURE_DIR + "/" + config.getTargetPath() + "/" + config.getMeasureName() + ".txt").exists())
      {
        System.out.println("Skipping: " + config.getMeasureName());
      } else {     
        CollectionReader reader = createReader(SemEvalCorpusReader.class,
            SemEvalCorpusReader.PARAM_INPUT_FILE, "classpath:/datasets/semeval/train/STS.input.ALLcombined.txt",
            SemEvalCorpusReader.PARAM_COMBINATION_STRATEGY, CombinationStrategy.SAME_ROW_ONLY.toString());
   
        AnalysisEngineDescription seg = createEngineDescription(
            BreakIteratorSegmenter.class);
View Full Code Here

      {
        System.out.println(" - skipped, feature already generated");
      }
      else
      {     
        CollectionReader reader = createReader(SemEvalCorpusReader.class,
            SemEvalCorpusReader.PARAM_INPUT_FILE, DATASET_DIR + "/" + mode.toString().toLowerCase() + "/STS.input." + dataset.toString() + ".txt",
            SemEvalCorpusReader.PARAM_COMBINATION_STRATEGY, CombinationStrategy.SAME_ROW_ONLY.toString(), SemEvalCorpusReader.PARAM_LANGUAGE, "en");
   
        // Tokenization
        AnalysisEngineDescription seg = createEngineDescription(
View Full Code Here

  public static void runLinearRegression(Dataset train, Dataset... test)
    throws UIMAException, IOException
  {
    for (Dataset dataset : test)
    {
      CollectionReader reader = createReader(SemEvalCorpusReader.class,
          SemEvalCorpusReader.PARAM_INPUT_FILE, DATASET_DIR + "/test/STS.input." + dataset.toString() + ".txt",
          SemEvalCorpusReader.PARAM_LANGUAGE, "en",
          SemEvalCorpusReader.PARAM_COMBINATION_STRATEGY, CombinationStrategy.SAME_ROW_ONLY.toString());
     
      AnalysisEngineDescription seg = createEngineDescription(BreakIteratorSegmenter.class);
View Full Code Here

TOP

Related Classes of org.apache.uima.collection.CollectionReader

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.