Package org.cleartk.classifier

Examples of org.cleartk.classifier.Feature


      for (BaseToken token : tokens) {
        ++outcomeIndex;

        // extract token features
        List<Feature> features = new ArrayList<Feature>();
        features.add(new Feature(token.getCoveredText()));
        features.add(new Feature("PartOfSpeech", token.getPartOfSpeech()));

        // extract previous classification features
        for (int i = this.nPreviousClassifications; i > 0; --i) {
          int index = outcomeIndex - i;
          String previousOutcome = index < 0 ? "O" : outcomes.get(index);
          features.add(new Feature("PreviousOutcome_" + i, previousOutcome));
        }

        // extract length of Modifier that is currently being created (if any)
        // int length = 0;
        // for (int i = outcomeIndex - 1; i > 0 && !"O".equals(outcomes.get(i)); --i) {
View Full Code Here


       
    // Extract features between
    List<TerminalTreebankNode> headList = this.extractPhraseHeadByTreenode(jCas, JCasUtil.selectCovered(jCas, TreebankNode.class, arg1.getEnd(), arg2.getBegin()));

    if(headList.size() > 0) {
      features.add(new Feature("PhraseChunk_Between_FirstHead", headList.get(0).getNodeValue()));
      features.add(new Feature("PhraseChunk_Between_LastHead", headList.get(headList.size()-1).getNodeValue()));
     
      if(headList.size() >= 2) {
        String inBetweenValue = "";
        for(int i=1;i<headList.size()-1;i++) {
          if(i>1)
            inBetweenValue += "_";
          inBetweenValue += headList.get(i).getNodeValue();
        }
        features.add(new Feature("PhraseChunk_Between_BetweenHeads", inBetweenValue));
      }
    }
   
    // Extract feature before M1
    headList = this.extractPhraseHeadByTreenode(jCas, JCasUtil.selectPreceding(jCas, TreebankNode.class, arg1, 20));

    boolean isFirst = false;
    for(int i=headList.size()-1;i>=0;i--) {
      if(headList.get(i).getEnd() < arg1.getBegin()) {
        if(!isFirst) {
        features.add(new Feature("PhraseChunk_Before_FirstHead", headList.get(i).getNodeValue()));
        isFirst = true;
        }
        else {
          features.add(new Feature("PhraseChunk_Before_SecondHead", headList.get(i).getNodeValue()));
          break;
        }
      }
    }
   
    // Extract feature after M2
    headList = this.extractPhraseHeadByTreenode(jCas, JCasUtil.selectFollowing(jCas, TreebankNode.class, arg2, 20));
   
   
    isFirst = false;
    for(int i=0;i<headList.size();i++) {
      if(headList.get(i).getBegin() > arg2.getEnd() ) {
        if(!isFirst) {
          features.add(new Feature("PhraseChunk_After_FirstHead", headList.get(i).getNodeValue()));
          isFirst = true;
        }
        else {
          features.add(new Feature("PhraseChunk_After_SecondHead", headList.get(i).getNodeValue()));
          break;
        }
      }
    }
   
View Full Code Here

        // features from previous classifications
        int nPreviousClassifications = 2;
        for (int i = nPreviousClassifications; i > 0; --i) {
          int index = tokenIndex - i;
          String previousOutcome = index < 0 ? "O" : outcomes.get(index);
          features.add(new Feature("PreviousOutcome_" + i, previousOutcome));
        }
        //add segment ID as a features:
        features.add(new Feature("SegmentID", segment.getId()));

        // features from dominating parse tree
        //        for(SimpleFeatureExtractor extractor : this.parseFeatureExtractors){
        BaseToken startToken = token;
        for(int i = tokenIndex-1; i >= 0; --i){
View Full Code Here

   
    List<EventMention> events = JCasUtil.selectCovered(jCas, EventMention.class, begin, end);
    List<TimeMention> times   = JCasUtil.selectCovered(jCas, TimeMention.class, begin, end);
    eventsInBetween = events==null? 0: events.size();
    timesInBetween  = times==null? 0: times.size();
    feats.add(new Feature("NumOfEvents_InBetween", eventsInBetween));
    feats.add(new Feature("NumOfTimes_InBetween", timesInBetween));
    feats.add(new Feature("NumOfEventsAndTimes_InBetween", timesInBetween+eventsInBetween));
   
//    //print long distances
//    if (eventsInBetween >= 200){
//      System.out.println("++++++++++Long Distance Relation in "+ ViewURIUtil.getURI(jCas).toString() + "+++++++");
//      System.out.println("["+arg1.getCoveredText()+"] "+ jCas.getDocumentText().substring(arg1.getEnd(), arg2.getBegin()) +" ["+arg2.getCoveredText()+"]");
View Full Code Here

      {
    List<Feature> feats = new ArrayList<Feature>();
   
    TreebankNode domNode = AnnotationTreeUtils.annotationNode(jcas, begin, end);
    if(domNode != null){
      feats.add(new Feature("DominatingTreeCat", domNode.getNodeType()));
      if(domNode.getNodeTags() != null){
        for(int ind = 0; ind < domNode.getNodeTags().size(); ind++){
          String tag = domNode.getNodeTags(ind);
          if(tag.equals("TMP")){
            feats.add(new Feature("DominatingTmpTag", tag));
          }
        }
      }
      TreebankNode parent = domNode.getParent();
      if(parent != null){
        feats.add(new Feature("DominatingTreeParent", parent.getNodeType()));
        do{
          if(parent.getNodeTags() != null){
            for(int ind = 0; ind < parent.getNodeTags().size(); ind++){
              String tag = parent.getNodeTags(ind);
//              if(tag.equals("TMP")){
                feats.add(new Feature("DominatingAncestorTmpTag", tag));
//              }
            }
          }
          parent = parent.getParent();
        }while(parent != null);
      }
     
      if(domNode.getLeaf()){
        feats.add(new Feature("DominatingIsLeaf"));
      }else{
        StringBuffer buffer = new StringBuffer();
        for(int i = 0; i < domNode.getChildren().size(); i++){
          buffer.append(domNode.getChildren(i).getNodeType());
          buffer.append("_");
          feats.add(new Feature("DominatingChildBag" + domNode.getChildren(i).getNodeType()));
        }
        feats.add(new Feature("DominatingProduction", buffer.toString()));
      }
      if(domNode.getBegin() == begin && domNode.getEnd() == end){
        feats.add(new Feature("DominatingExactMatch"));
      }
    }
    return feats;
  }
View Full Code Here

  }

  public List<Feature> extract(BaseToken token) {
    List<Feature> features = Lists.newArrayList();
    Collection<Predicate> predicates = this.tokenPredicateMap.get(token);
    features.add(new Feature("Predicate", !predicates.isEmpty()));
    for (Predicate predicate : predicates) {
      features.add(new Feature("Predicate_Lex", predicate.getCoveredText()));
    }
    for (SemanticArgument argument : this.tokenArgumentMap.get(token)) {
      SemanticRoleRelation relation = argument.getRelation();
      String category = relation.getCategory();
      features.add(new Feature("Argument", category));
      String predicateText = relation.getPredicate().getCoveredText();
      features.add(new Feature("Argument_Lex", String.format("%s_%s", category, predicateText)));
    }
    return features;
  }
View Full Code Here

    {
      return features;
    }

    LinkedList<ConllDependencyNode> node1ToNode2Path = DependencyParseUtils.getPathBetweenNodes(node1, node2);
    features.add(new Feature("dependency_path", DependencyParseUtils.pathToString(node1ToNode2Path)));
    features.add(new Feature("dependency_path_length", node1ToNode2Path.size()));//add path length as a feature

    return features;
  }
View Full Code Here

   
    //2 get DateX
    if (sentList != null && !sentList.isEmpty()){
      for(Sentence sent : sentList) {
        for (@SuppressWarnings("unused") DateAnnotation date : JCasUtil.selectCovered(view, DateAnnotation.class, sent)) {
          Feature indicator = new Feature("DateXNearby", this.name);
          features.add(indicator);
          break;
        }
      }
    }
   
  //3 get Measurement
    if (sentList != null && !sentList.isEmpty()){
      for(Sentence sent : sentList) {
        for (@SuppressWarnings("unused") MeasurementAnnotation date : JCasUtil.selectCovered(view, MeasurementAnnotation.class, sent)) {
          Feature indicator = new Feature("MeasurementNearby", "measure");
          features.add(indicator);
          break;
        }
      }
    }
   
  //4 get number
    if (sentList != null && !sentList.isEmpty()){
      for(Sentence sent : sentList) {
        for (@SuppressWarnings("unused") NumToken date : JCasUtil.selectCovered(view, NumToken.class, sent)) {
          Feature indicator = new Feature("NumTokenNearby", "NumToken");
          features.add(indicator);
          break;
        }
      }
    }
View Full Code Here

              verbDistMap.put(Math.abs(wt.getBegin() - annotation.getBegin()), wt);
            }
          }
        }
        for (Map.Entry<Integer, WordToken> entry : verbDistMap.entrySet()) {
          Feature feature = new Feature(this.name+"_token", entry.getValue().getCoveredText());
          features.add(feature);
          //logger.info("found nearby closest verb: "+ entry.getValue().getCoveredText() + " POS:" + entry.getValue().getPartOfSpeech());
          Feature posfeature = new Feature(this.name, entry.getValue().getPartOfSpeech());
          features.add(posfeature);
          break;             
        }
      }
     
View Full Code Here

     
      // print out totals:
      for(String typeId : typeCounts.keySet()){
        String featName = "eventTypeID_"+typeId;
        eventTypes.add(featName);
        features.add(new Feature(featName, typeCounts.get(typeId)));       
      }
     
      // TO print out just the types without counts:
//      for(String typeId : typeCounts.keySet()){
//        features.add(new Feature("arg1EntityTypeID_", typeId));
View Full Code Here

TOP

Related Classes of org.cleartk.classifier.Feature

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.