Package org.cleartk.classifier

Examples of org.cleartk.classifier.Feature


      tree = TreeExtractor.getSimpleClone(t2);
    }else{
      tree = TreeExtractor.extractPathEnclosedTree(t1, t2, jcas);
    }

    features.add(new Feature("TK_PET", tree.toString()));
    return features;
  }
View Full Code Here


    ConllDependencyNode headNode = DependencyUtility.getNominalHeadNode(jcas, focusAnnotation);
    try {
      boolean[] regexFeats = conAnal.findNegationContext(nodes, headNode);
      for(int j = 0; j < regexFeats.length; j++){
        if(regexFeats[j]){
          feats.add(new Feature("DepPath_" + conAnal.getRegexName(j))); //"NEG_DEP_REGEX_"+j));
        }
      }
    } catch (Exception e) {
      e.printStackTrace();
      throw new CleartkExtractorException(e);
View Full Code Here

//      for (String domain : domainIds) {
//        String featureName = Feature.createName(domain, DOMAIN_ADAPTATION_ALGORITHM, feature.getName());
      String featureName = Feature.createName(currentDomain, DOMAIN_ADAPTATION_ALGORITHM, feature.getName());
     
      fedaFeatures.add(
          new Feature(
              featureName,
              featureValue.toString() )
          );
//      }
      return fedaFeatures;
View Full Code Here

      if (!entity.getClass().equals(EventMention.class)) {
        List<BaseToken> tokens = JCasUtil.selectCovered(jCas, BaseToken.class, entity);
        if (tokens.size() > 0){
          BaseToken lastToken = tokens.get(tokens.size() - 1);
            String value = String.format("%s_%s", entity.getClass().getSimpleName(), entity.getTypeID());
            endOfEntityFeatures.put(lastToken, new Feature("EndOf", value));
        }      
      }
    }

    Random rand = new Random();
   
    //TRY SMOTE algorithm here to generate more minority class samples
    SMOTEplus smote = new SMOTEplus((int)Math.ceil(this.smoteNumOfNeighbors));
       
    // classify tokens within each sentence
    for (Sentence sentence : JCasUtil.selectCovered(jCas, Sentence.class, segment)) {
      List<BaseToken> tokens = JCasUtil.selectCovered(jCas, BaseToken.class, sentence);

      // during training, the list of all outcomes for the tokens
      List<String> outcomes;
      if (this.isTraining()) {
        List<EventMention> events = Lists.newArrayList();
        for (EventMention event : JCasUtil.selectCovered(jCas, EventMention.class, sentence)) {
          if (event.getClass().equals(EventMention.class)) {
            events.add(event);
          }
        }
        outcomes = this.eventChunking.createOutcomes(jCas, tokens, events);
      }
      // during prediction, the list of outcomes predicted so far
      else {
        outcomes = new ArrayList<String>();
      }

      // get BIO entity tags for each entity type
      int[] entityTypeIDs = new int[] {
          CONST.NE_TYPE_ID_ANATOMICAL_SITE,
          CONST.NE_TYPE_ID_DISORDER,
          CONST.NE_TYPE_ID_DRUG,
          CONST.NE_TYPE_ID_FINDING,
          CONST.NE_TYPE_ID_PROCEDURE,
          CONST.NE_TYPE_ID_UNKNOWN };
      List<IdentifiedAnnotation> entities;
      if (this.isTraining()) {
        entities = Lists.newArrayList();
        for (IdentifiedAnnotation entity : JCasUtil.selectCovered(jCas, IdentifiedAnnotation.class, sentence)) {
          if (!entity.getClass().equals(EventMention.class)) {
            entities.add(entity);
          }
        }
      } else {
        entities = JCasUtil.selectCovered(jCas, IdentifiedAnnotation.class, sentence);
      }
     
      List<ChunkingExtractor> chunkingExtractors = Lists.newArrayList();
      for (int typeID : entityTypeIDs) {
        Predicate<IdentifiedAnnotation> hasTypeID = hasEntityType(typeID);
        List<IdentifiedAnnotation> subEntities = Lists.newArrayList(Iterables.filter(entities, hasTypeID));
        chunkingExtractors.add(new ChunkingExtractor("EntityTag", this.entityChunking, jCas, tokens, subEntities));
      }
     
      // add extractor for phase chunks
      List<Chunk> chunks = JCasUtil.selectCovered(jCas, Chunk.class, sentence);
      chunkingExtractors.add(new ChunkingExtractor("PhraseTag", this.phraseChunking, jCas, tokens, chunks));

      // extract features for all tokens
      int tokenIndex = -1;
      int nChunkLabelsBefore = 2;
      int nChunkLabelsAfter = 2;
      int nPreviousClassifications = 2;

      for (BaseToken token : tokens) {
        ++tokenIndex;

        List<Feature> features = new ArrayList<Feature>();

        // features from previous classifications
        for (int i = nPreviousClassifications; i > 0; --i) {
          int index = tokenIndex - i;
          String previousOutcome = index < 0 ? "O" : outcomes.get(index);
          features.add(new Feature("PreviousOutcome_" + i, previousOutcome));
        }
       
        // features from token attributes
        features.addAll(this.tokenFeatureExtractor.extract(jCas, token));
View Full Code Here

    parentScore = 0.0;
    ArrayList<Feature> features = new ArrayList<Feature>();
    String category = NON_MENTION;

    // node-based features
    if(node.getParent().getParent() == null) features.add(new Feature("IS_ROOT"));
    features.add(new Feature("NODE_LABEL", node.getNodeType()));
    features.add(new Feature("PARENT_LABEL", node.getParent().getNodeType()));
    List<BaseToken> coveredTokens = JCasUtil.selectCovered(BaseToken.class, node);
   
    //check span length, check if a small node contains any time word
    int numTokens = coveredTokens.size();
   
    if(node.getLeaf()){
      features.add(new Feature("IS_LEAF"));
      features.addAll(wordTypeExtractor.extract(jCas, node));
    }else{
      StringBuilder buffer = new StringBuilder();
      for(int i = 0; i < node.getChildren().size(); i++){
        buffer.append(node.getChildren(i).getNodeType());
        buffer.append("_");
        features.add(new Feature("CHILD_BAG", node.getChildren(i).getNodeType()));
      }
//      features.add(new Feature("NUM_TOKENS", JCasUtil.selectCovered(BaseToken.class, node).size()));
      features.add(new Feature("PRODUCTION", buffer.toString()));
//      features.add(new Feature("LeftSibling", getSiblingCategory(node, -1)));
//      features.add(new Feature("RightSibling", getSiblingCategory(node, 1)));
    }
   
    // other feature types:
View Full Code Here

            closest = tokens.size();
          }
//          instance.addAll(cuePhraseInWindowExtractor.extractBetween(jCas, cue, entityOrEventMention));
        }
        if(closestCue != null && closest < 21){
          instance.add(new Feature("ClosestCue_Word", closestCue.getCoveredText()));
//          instance.add(new Feature("ClosestCue_Phrase", closestCue.getCuePhrase()));
          instance.add(new Feature("ClosestCue_PhraseFamily", closestCue.getCuePhraseAssertionFamily()));
          instance.add(new Feature("ClosestCue_PhraseCategory", closestCue.getCuePhraseCategory()));
         
          // add hack-ey domain adaptation to these hacked-in features
          if (!fileToDomain.isEmpty() && ffDomainAdaptor!=null) {
            instance.addAll(ffDomainAdaptor.apply(new Feature("ClosestCue_Word", closestCue.getCoveredText())));
            instance.addAll(ffDomainAdaptor.apply(new Feature("ClosestCue_PhraseFamily", closestCue.getCuePhraseAssertionFamily())));
              instance.addAll(ffDomainAdaptor.apply(new Feature("ClosestCue_PhraseCategory", closestCue.getCuePhraseCategory())));
          }
         
        }
      }
//      if (cuePhraseFeatures != null && !cuePhraseFeatures.isEmpty())
//      {
//        instance.addAll(cuePhraseFeatures);
//      }


      // 7/9/13 SRH trying to make it work just for anatomical site
      int eemTypeId = entityOrEventMention.getTypeID();
      if (eemTypeId == CONST.NE_TYPE_ID_ANATOMICAL_SITE) {
          // 7/9/13 srh modified per tmiller so it's binary but not numeric feature
          //instance.add(new Feature("ENTITY_TYPE_" + entityOrEventMention.getTypeID()));
          instance.add(new Feature("ENTITY_TYPE_ANAT_SITE"));
          // add hack-ey domain adaptation to these hacked-in features
          if (!fileToDomain.isEmpty() && ffDomainAdaptor!=null) {
            instance.addAll(ffDomainAdaptor.apply(new Feature("ENTITY_TYPE_ANAT_SITE")));
          }
      }
      /* This hurts recall more than it helps precision
      else if (eemTypeId == CONST.NE_TYPE_ID_DRUG) {
        // 7/10 adding drug
View Full Code Here

   
    // Pull in general dependency-based features -- externalize to another extractor?
      ConllDependencyNode node = DependencyUtility.getNominalHeadNode(jCas, arg);
      if (node!= null) {
//        features.add(new Feature("DEPENDENCY_HEAD", node));
        features.add(new Feature("DEPENDENCY_HEAD_word", node.getCoveredText()));
//        features.add(new Feature("DEPENDENCY_HEAD_pos", node.getPostag()));
        features.add(new Feature("DEPENDENCY_HEAD_deprel", node.getDeprel()));
//        features.add(new Feature("DEPENDENCY_HEAD_lemma", node.getLemma()));
    }
     
      HashMap<String, Boolean> featsMap = HistoryAttributeClassifier.extract(jCas, arg);

      // Pull in all the features that were used for the rule-based module
      features.addAll( hashToFeatureList(featsMap) );
     
      // Pull in the result of the rule-based module as well
      features.add(new Feature("HISTORY_CLASSIFIER_LOGIC", HistoryAttributeClassifier.classifyWithLogic(featsMap)));
     
      // Add whether it is token preceded by "h/o"
      //features.add(new Feature("PRECEDED_BY_H_SLASH_O", HistoryAttributeClassifier.precededByH_O(jCas, arg)));
     
      return features;
View Full Code Here

  private Collection<? extends Feature> hashToFeatureList(
      HashMap<String, Boolean> featsIn) {
   
    Collection<Feature> featsOut = new HashSet<Feature>();
    for (String featName : featsIn.keySet()) {
      featsOut.add(new Feature(featName,featsIn.get(featName)));
    }
   
    return featsOut;
  }
View Full Code Here

  public List<Feature> extract(JCas view, Annotation focusAnnotation)
      throws CleartkExtractorException {
    List<Feature> featList = new ArrayList<Feature>();
   
    IdentifiedAnnotation mention = (IdentifiedAnnotation) focusAnnotation;
    featList.add(new Feature("IsNegated", mention.getPolarity() == CONST.NE_POLARITY_NEGATION_PRESENT));
    return featList;
  }
View Full Code Here

    List<Feature> features = new ArrayList<Feature>();
   
    // Pull in general dependency-based features -- externalize to another extractor?
      ConllDependencyNode node = DependencyUtility.getNominalHeadNode(jCas, arg);
      if (node!= null) {
        features.add(new Feature("DEPENDENCY_HEAD", node.getCoveredText()));
        features.add(new Feature("DEPENDENCY_HEAD_deprel", node.getDeprel()));
    }
     
      HashMap<String, Boolean> featsMap = GenericAttributeClassifier.extract(jCas, arg);

      // Pull in all the features that were used for the rule-based module
      features.addAll( hashToFeatureList(featsMap) );
      // Pull in the result of the rule-based module as well
      features.add(new Feature("GENERIC_CLASSIFIER_LOGIC", GenericAttributeClassifier.classifyWithLogic(featsMap)));

     
      return features;
  }
View Full Code Here

TOP

Related Classes of org.cleartk.classifier.Feature

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.