Package org.cleartk.classifier.feature.extractor

Examples of org.cleartk.classifier.feature.extractor.CleartkExtractor


        charExtractors,
        new TypePathExtractor(BaseToken.class, "partOfSpeech"));
   
    featureExtractors = new ArrayList<SimpleFeatureExtractor>();
//    featureExtractors.add(new CleartkExtractor(BaseToken.class, new CoveredTextExtractor(), new Bag(new Covered())));
    featureExtractors.add(new CleartkExtractor(BaseToken.class, allExtractors, new Bag(new Covered())));
//    featureExtractors.add(charExtractors);
    wordTypeExtractor = new CleartkExtractor(BaseToken.class, new TimeWordTypeExtractor(), new Bag(new Covered()));
//    featureExtractors.add(new CleartkExtractor(BaseToken.class, new CoveredTextExtractor(), new Bag(new Preceding(1))));
//   featureExtractors.add(new CleartkExtractor(BaseToken.class, new CoveredTextExtractor(), new Bag(new Following(1))));
    // bag of constituent descendent labels
//    featureExtractors.add(new CleartkExtractor(TreebankNode.class, new TypePathExtractor(TreebankNode.class, "nodeType"), new Bag(new Covered())));
   
View Full Code Here


    // a list of feature extractors that require the token and the sentence
//    this.contextFeatureExtractors = new ArrayList<CleartkExtractor>();
   
    this.tokenCleartkExtractors = new ArrayList<CleartkExtractor>();

    CleartkExtractor tokenExtraction1 =
        new CleartkExtractor(
            BaseToken.class,
//            new FeatureFunctionExtractor(new CoveredTextExtractor(), new LowerCaseFeatureFunction()),
//            new FeatureFunctionExtractor(new CoveredTextExtractor(), new BrownClusterFeatureFunction()),
            new CoveredTextExtractor(),
            //new CleartkExtractor.Covered(),
            new CleartkExtractor.LastCovered(2),
            new CleartkExtractor.Preceding(5),
            new CleartkExtractor.Following(4),
            new CleartkExtractor.Bag(new CleartkExtractor.Preceding(3)),
            new CleartkExtractor.Bag(new CleartkExtractor.Following(3)),
            new CleartkExtractor.Bag(new CleartkExtractor.Preceding(5)),
            new CleartkExtractor.Bag(new CleartkExtractor.Following(5)),
            new CleartkExtractor.Bag(new CleartkExtractor.Preceding(10)),
            new CleartkExtractor.Bag(new CleartkExtractor.Following(10))
            );
   
//    CleartkExtractor posExtraction1 =
//        new CleartkExtractor(
//            BaseToken.class,
//            new TypePathExtractor(BaseToken.class, "partOfSpeech"),
//            new CleartkExtractor.LastCovered(2),
//            new CleartkExtractor.Preceding(3),
//            new CleartkExtractor.Following(2)
//            );

    this.tokenCleartkExtractors.add(tokenExtraction1);
    //this.tokenCleartkExtractors.add(posExtraction1);
   
//    this.contextFeatureExtractors.add(new CleartkExtractor(IdentifiedAnnotation.class,
//        new CoveredTextExtractor(),
//        //new TypePathExtractor(IdentifiedAnnotation.class, "stem"),
//        new Preceding(2),
//        new Following(2)));
   
    // stab at dependency-based features
    //List<Feature> features = new ArrayList<Feature>();
    //ConllDependencyNode node1 = findAnnotationHead(jCas, arg1);

    CombinedExtractor baseExtractorCuePhraseCategory =
        new CombinedExtractor
          (
           new CoveredTextExtractor(),
           new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhrase"),
           new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseCategory"),
           new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseAssertionFamily")
          );
   
    cuePhraseInWindowExtractor = new CleartkExtractor(
        BaseToken.class,
        new CoveredTextExtractor(),
        new CleartkExtractor.Bag(new CleartkExtractor.Covered())
//          AssertionCuePhraseAnnotation.class,
//          baseExtractorCuePhraseCategory,
View Full Code Here

           new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhrase"),
           new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseCategory"),
           new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseAssertionFamily")
          );
   
    CleartkExtractor cuePhraseInWindowExtractor =
        new CleartkExtractor(
              AssertionCuePhraseAnnotation.class,
              baseExtractorCuePhraseCategory,
              new CleartkExtractor.Bag(new CleartkExtractor.Preceding(5)),
              new CleartkExtractor.Bag(new CleartkExtractor.Following(5))
              );
//              new CleartkExtractor.Ngram(new CleartkExtractor.Preceding(5), new CleartkExtractor.Following(5)));
              //new CoveredTextExtractor(),
//              new CleartkExtractor.Covered());
//              new CleartkExtractor.Preceding(5),
//              new CleartkExtractor.Following(5));
   
    CleartkExtractor tokenExtraction1 =
        new CleartkExtractor(
            BaseToken.class,
            new CoveredTextExtractor(),
            //new CleartkExtractor.Covered(),
            new CleartkExtractor.LastCovered(2),
            new CleartkExtractor.Preceding(5),
            new CleartkExtractor.Following(4),
            new CleartkExtractor.Bag(new CleartkExtractor.Preceding(10)),
            new CleartkExtractor.Bag(new CleartkExtractor.Following(10))
            );
   
    CleartkExtractor posExtraction1 =
        new CleartkExtractor(
            BaseToken.class,
            new TypePathExtractor(BaseToken.class, "partOfSpeech"),
            new CleartkExtractor.LastCovered(2),
            new CleartkExtractor.Preceding(3),
            new CleartkExtractor.Following(2)
View Full Code Here

           new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhrase"),
           new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseCategory"),
           new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseAssertionFamily")
          );
   
    CleartkExtractor cuePhraseInWindowExtractor =
        new CleartkExtractor(
              AssertionCuePhraseAnnotation.class,
              baseExtractorCuePhraseCategory,
              new CleartkExtractor.Bag(new CleartkExtractor.Preceding(5)),
              new CleartkExtractor.Bag(new CleartkExtractor.Following(5))
              );
//              new CleartkExtractor.Ngram(new CleartkExtractor.Preceding(5), new CleartkExtractor.Following(5)));
              //new CoveredTextExtractor(),
//              new CleartkExtractor.Covered());
//              new CleartkExtractor.Preceding(5),
//              new CleartkExtractor.Following(5));
   
    List<CleartkExtractor> extractorList = new ArrayList<CleartkExtractor>();
    extractorList.add(cuePhraseInWindowExtractor);
   
    //NamingExtractor cuePhraseInWindowNamingExtractor = new NamingExtractor("cuePhraseCategory__", cuePhraseInWindowExtractor);

    Collection<IdentifiedAnnotation> identifiedAnnotations = JCasUtil.select(jcas,  IdentifiedAnnotation.class);
    for (IdentifiedAnnotation current : identifiedAnnotations)
    {
      if (!(current instanceof EntityMention) && !(current instanceof EventMention)) continue;
     
      // otherwise current is an entity or event mention...
      logger.info(String.format("identified annotation (event or entity) [%d-%d] \"%s\" [%s]", current.getBegin(), current.getEnd(), current.getCoveredText(), current.getClass().getName()));
     
      Collection<Sentence> coveringSentences = entityToSentenceMap.get(current);
      if (coveringSentences == null || coveringSentences.isEmpty())
      {
        logger.info("no covering sentences found!!! continuing with next entity/event...");
        continue;
      }
      logger.info(String.format("covering sentence count: %d", coveringSentences.size()));
      Sentence firstCoveringSentence = coveringSentences.iterator().next();
     
      logger.info(String.format(
          "first covering sentence: [%d-%d] \"%s\" (%s)",
          firstCoveringSentence.getBegin(), firstCoveringSentence.getEnd(),
          firstCoveringSentence.getCoveredText(),
          firstCoveringSentence.getClass().getName()));
     
      List<Feature> cuePhraseFeatures =
          //cuePhraseInSentenceExtractor.extract(jcas, firstCoveringSentence);
          cuePhraseInWindowExtractor.extractWithin(jcas, current, firstCoveringSentence);
          //cuePhraseInWindowNamingExtractor.extract(jcas, current);
      if (cuePhraseFeatures != null && !cuePhraseFeatures.isEmpty())
      {
        String featureDebugString = (cuePhraseFeatures == null) ? "(no cue phrase features)" : cuePhraseFeatures.toString();
        logger.info("### cue phrase features: " + featureDebugString);
View Full Code Here

    // a list of feature extractors that require the token and the sentence
//    this.contextFeatureExtractors = new ArrayList<CleartkExtractor>();
   
    this.tokenCleartkExtractors = new ArrayList<CleartkExtractor>();

    CleartkExtractor tokenExtraction1 =
        new CleartkExtractor(
            BaseToken.class,
//            new FeatureFunctionExtractor(new CoveredTextExtractor(), new LowerCaseFeatureFunction()),
            new CoveredTextExtractor(),
            //new CleartkExtractor.Covered(),
            new CleartkExtractor.LastCovered(2),
            new CleartkExtractor.Preceding(5),
            new CleartkExtractor.Following(4),
            new CleartkExtractor.Bag(new CleartkExtractor.Preceding(3)),
            new CleartkExtractor.Bag(new CleartkExtractor.Following(3)),
            new CleartkExtractor.Bag(new CleartkExtractor.Preceding(5)),
            new CleartkExtractor.Bag(new CleartkExtractor.Following(5)),
            new CleartkExtractor.Bag(new CleartkExtractor.Preceding(10)),
            new CleartkExtractor.Bag(new CleartkExtractor.Following(10))
            );
   
//    CleartkExtractor posExtraction1 =
//        new CleartkExtractor(
//            BaseToken.class,
//            new TypePathExtractor(BaseToken.class, "partOfSpeech"),
//            new CleartkExtractor.LastCovered(2),
//            new CleartkExtractor.Preceding(3),
//            new CleartkExtractor.Following(2)
//            );

    this.tokenCleartkExtractors.add(tokenExtraction1);
    //this.tokenCleartkExtractors.add(posExtraction1);
   
//    this.contextFeatureExtractors.add(new CleartkExtractor(IdentifiedAnnotation.class,
//        new CoveredTextExtractor(),
//        //new TypePathExtractor(IdentifiedAnnotation.class, "stem"),
//        new Preceding(2),
//        new Following(2)));
   
    // stab at dependency-based features
    //List<Feature> features = new ArrayList<Feature>();
    //ConllDependencyNode node1 = findAnnotationHead(jCas, arg1);

    CombinedExtractor baseExtractorCuePhraseCategory =
        new CombinedExtractor
          (
           new CoveredTextExtractor(),
           new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhrase"),
           new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseCategory"),
           new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseAssertionFamily")
          );
   
    cuePhraseInWindowExtractor = new CleartkExtractor(
        BaseToken.class,
        new CoveredTextExtractor(),
        new CleartkExtractor.Bag(new CleartkExtractor.Covered())
//          AssertionCuePhraseAnnotation.class,
//          baseExtractorCuePhraseCategory,
View Full Code Here

           new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhrase"),
           new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseCategory"),
           new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseAssertionFamily")
          );
   
    CleartkExtractor cuePhraseInWindowExtractor =
        new CleartkExtractor(
              AssertionCuePhraseAnnotation.class,
              baseExtractorCuePhraseCategory,
              new CleartkExtractor.Bag(new CleartkExtractor.Preceding(5)),
              new CleartkExtractor.Bag(new CleartkExtractor.Following(5))
              );
//              new CleartkExtractor.Ngram(new CleartkExtractor.Preceding(5), new CleartkExtractor.Following(5)));
              //new CoveredTextExtractor(),
//              new CleartkExtractor.Covered());
//              new CleartkExtractor.Preceding(5),
//              new CleartkExtractor.Following(5));
   
    List<CleartkExtractor> extractorList = new ArrayList<CleartkExtractor>();
    extractorList.add(cuePhraseInWindowExtractor);
   
    //NamingExtractor cuePhraseInWindowNamingExtractor = new NamingExtractor("cuePhraseCategory__", cuePhraseInWindowExtractor);

    Collection<IdentifiedAnnotation> identifiedAnnotations = JCasUtil.select(jcas,  IdentifiedAnnotation.class);
    for (IdentifiedAnnotation current : identifiedAnnotations)
    {
      if (!(current instanceof EntityMention) && !(current instanceof EventMention)) continue;
     
      // otherwise current is an entity or event mention...
      logger.info(String.format("identified annotation (event or entity) [%d-%d] \"%s\" [%s]", current.getBegin(), current.getEnd(), current.getCoveredText(), current.getClass().getName()));
     
      Collection<Sentence> coveringSentences = entityToSentenceMap.get(current);
      if (coveringSentences == null || coveringSentences.isEmpty())
      {
        logger.info("no covering sentences found!!! continuing with next entity/event...");
        continue;
      }
      logger.info(String.format("covering sentence count: %d", coveringSentences.size()));
      Sentence firstCoveringSentence = coveringSentences.iterator().next();
     
      logger.info(String.format(
          "first covering sentence: [%d-%d] \"%s\" (%s)",
          firstCoveringSentence.getBegin(), firstCoveringSentence.getEnd(),
          firstCoveringSentence.getCoveredText(),
          firstCoveringSentence.getClass().getName()));
     
      List<Feature> cuePhraseFeatures =
          //cuePhraseInSentenceExtractor.extract(jcas, firstCoveringSentence);
          cuePhraseInWindowExtractor.extractWithin(jcas, current, firstCoveringSentence);
          //cuePhraseInWindowNamingExtractor.extract(jcas, current);
      if (cuePhraseFeatures != null && !cuePhraseFeatures.isEmpty())
      {
        String featureDebugString = (cuePhraseFeatures == null) ? "(no cue phrase features)" : cuePhraseFeatures.toString();
        logger.info("### cue phrase features: " + featureDebugString);
View Full Code Here

           new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhrase"),
           new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseCategory"),
           new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseAssertionFamily")
          );
   
    CleartkExtractor cuePhraseInWindowExtractor =
        new CleartkExtractor(
              AssertionCuePhraseAnnotation.class,
              baseExtractorCuePhraseCategory,
              new CleartkExtractor.Bag(new CleartkExtractor.Preceding(5)),
              new CleartkExtractor.Bag(new CleartkExtractor.Following(5))
              );
//              new CleartkExtractor.Ngram(new CleartkExtractor.Preceding(5), new CleartkExtractor.Following(5)));
              //new CoveredTextExtractor(),
//              new CleartkExtractor.Covered());
//              new CleartkExtractor.Preceding(5),
//              new CleartkExtractor.Following(5));
   
    CleartkExtractor tokenExtraction1 =
        new CleartkExtractor(
            BaseToken.class,
            new CoveredTextExtractor(),
            //new CleartkExtractor.Covered(),
            new CleartkExtractor.LastCovered(2),
            new CleartkExtractor.Preceding(5),
            new CleartkExtractor.Following(4),
            new CleartkExtractor.Bag(new CleartkExtractor.Preceding(10)),
            new CleartkExtractor.Bag(new CleartkExtractor.Following(10))
            );
   
    CleartkExtractor posExtraction1 =
        new CleartkExtractor(
            BaseToken.class,
            new TypePathExtractor(BaseToken.class, "partOfSpeech"),
            new CleartkExtractor.LastCovered(2),
            new CleartkExtractor.Preceding(3),
            new CleartkExtractor.Following(2)
View Full Code Here

TOP

Related Classes of org.cleartk.classifier.feature.extractor.CleartkExtractor

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.