Package org.apache.uima.collection

Examples of org.apache.uima.collection.CollectionReader


  private static final String OUTPUT_FILE = "target/output.txt";

  public static void main(String[] args)
    throws Exception
  {
    CollectionReader reader = createReader(ShortAnswerGradingReader.class,
        ShortAnswerGradingReader.PARAM_INPUT_DIR, "classpath:/datasets/mm09",
        ShortAnswerGradingReader.PARAM_DOCUMENT_IDS, "sequential",
        ShortAnswerGradingReader.PARAM_COMBINATION_STRATEGY, CombinationStrategy.SAME_ROW_ONLY.toString());
   
//    CollectionReader reader = createCollectionReader(PlainTextCombinationReader.class,
View Full Code Here


    {     
      if (new File(OUTPUT_FEATURE_DIR + "/" + config.getTargetPath() + "/" + config.getMeasureName() + ".txt").exists())
      {
        System.out.println("Skipping: " + config.getMeasureName());
      } else {     
        CollectionReader reader = createReader(ShortAnswerGradingReader.class,
            ShortAnswerGradingReader.PARAM_INPUT_DIR, "classpath:/datasets/mm09",
            ShortAnswerGradingReader.PARAM_COMBINATION_STRATEGY, CombinationStrategy.SAME_ROW_ONLY.toString());
   
        AnalysisEngineDescription seg = createEngineDescription(BreakIteratorSegmenter.class);
       
View Full Code Here

public class OutputGoldstandard
{
  public static void main(String[] args)
    throws Exception
  {
    CollectionReader reader = createReader(ShortAnswerGradingReader.class,
        ShortAnswerGradingReader.PARAM_INPUT_DIR, "classpath:/datasets/mm09",
        ShortAnswerGradingReader.PARAM_COMBINATION_STRATEGY, CombinationStrategy.SAME_ROW_ONLY.toString());
   
    AnalysisEngine writer = createEngine(OutputGoldstandardWriter.class,
        OutputGoldstandardWriter.PARAM_OUTPUT_FILE, "target/mm09-goldstandard.txt");
View Full Code Here

    @Test
    public void testIndexCreation()
        throws Exception
    {
    
        CollectionReader reader = createReader(
                TextReader.class,
                TextReader.PARAM_SOURCE_LOCATION, "src/test/resources/input/",
                TextReader.PARAM_PATTERNS, "*.txt");
       
        AnalysisEngine segmenter = createEngine(
View Full Code Here

  }
 
  public static void generateDocumentOrder(Dataset dataset)
    throws UIMAException, IOException
  {
    CollectionReader reader = getCollectionReader(dataset);
   
    AnalysisEngine writer = AnalysisEngineFactory.createPrimitive(
        DocumentOrderWriter.class,
        DocumentOrderWriter.PARAM_OUTPUT_FILE, UTILS_DIR + "/doc-order/" + dataset.toString() + ".txt");
   
View Full Code Here

    }
    else
   
      System.out.println(" - this may take a while...");
   
      CollectionReader reader = ColingUtils.getCollectionReader(dataset);
     
      // Tokenization
      AnalysisEngineDescription seg = createPrimitiveDescription(
          BreakIteratorSegmenter.class);
      AggregateBuilder builder = new AggregateBuilder();
View Full Code Here

      {
        System.out.println(" - skipped, feature already generated");
      }
      else
      {     
        CollectionReader reader = ColingUtils.getCollectionReader(dataset);
   
        // Tokenization
        AnalysisEngineDescription seg = createPrimitiveDescription(
            BreakIteratorSegmenter.class);
        AggregateBuilder builder = new AggregateBuilder();
View Full Code Here

    }
    else
   
      System.out.println(" - this may take a while...");
   
      CollectionReader reader = ColingUtils.getCollectionReader(dataset);
     
      // Tokenization
      AnalysisEngineDescription seg = createPrimitiveDescription(
          BreakIteratorSegmenter.class);
      AggregateBuilder builder = new AggregateBuilder();
View Full Code Here

    throws ResourceInitializationException
  {
    switch(dataset)
    {
    case WikipediaRewriteCorpus:
      CollectionReader clough = CollectionReaderFactory.createCollectionReader(
        CloughCorpusReader.class,
        CloughCorpusReader.PARAM_INPUT_DIR, getDatasetDir(dataset).getAbsolutePath(),
        CloughCorpusReader.PARAM_COMBINATION_STRATEGY, CombinationStrategy.SAME_ROW_ONLY.toString());
      return clough;
    case MeterCorpus:
      CollectionReader meter = CollectionReaderFactory.createCollectionReader(
        MeterCorpusReader.class,
        MeterCorpusReader.PARAM_INPUT_DIR, getDatasetDir(dataset).getAbsolutePath(),
        MeterCorpusReader.PARAM_COLLECTION, MeterCorpusReader.MeterCorpusCollection.SINGLE_SOURCE_SUBSET.toString(),
        MeterCorpusReader.PARAM_COMBINATION_STRATEGY, CombinationStrategy.SAME_ROW_ONLY.toString());
      return meter;
    case WebisCrowdParaphraseCorpus:
      CollectionReader webis = CollectionReaderFactory.createCollectionReader(
        SemEvalCorpusReader.class,
        SemEvalCorpusReader.PARAM_INPUT_FILE, getDatasetDir(dataset).getAbsolutePath() + "/webis-cpc.txt",
        SemEvalCorpusReader.PARAM_COMBINATION_STRATEGY, CombinationStrategy.SAME_ROW_ONLY.toString());
      return webis;
    default:
View Full Code Here

  public static void runLinearRegression(Dataset train, Dataset... test)
    throws UIMAException, IOException
  {
    for (Dataset dataset : test)
    {
      CollectionReader reader = createReader(SemEvalCorpusReader.class,
          SemEvalCorpusReader.PARAM_INPUT_FILE, DATASET_DIR + "/test/STS.input." + dataset.toString() + ".txt",
          SemEvalCorpusReader.PARAM_COMBINATION_STRATEGY, CombinationStrategy.SAME_ROW_ONLY.toString());
     
      AnalysisEngineDescription seg = createEngineDescription(BreakIteratorSegmenter.class);
     
View Full Code Here

TOP

Related Classes of org.apache.uima.collection.CollectionReader

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.