Package org.apache.uima

Examples of org.apache.uima.TokenAnnotation


      // iterate over all token annotations and add stem if available
      FSIterator<Annotation> tokenIterator = aJCas.getAnnotationIndex(TokenAnnotation.type).iterator();
      while (tokenIterator.hasNext()) {
        // get token content
        TokenAnnotation annot = (TokenAnnotation) tokenIterator.next();
        String span = annot.getCoveredText();

        // set annotation content and call stemmer
        try {
          stemmer.setCurrent(span);
          stemmerStemMethod.invoke(stemmer, emptyArgs);
        } catch (Exception ex) {
          throw new AnalysisEngineProcessException(ex);
        }

        // get stemmer result and set annotation feature
        annot.setStem(stemmer.getCurrent());
      }
    } else {
      if (language.equals("x")) {
        this.logger.log(Level.WARNING, "Language of the CAS is set to 'x', SnowballAnnotator skipped processing.");
      }
View Full Code Here


      tokenList.clear();
      wordList.clear();

      FSIterator tokenIterator = tokenIndex.subiterator(sentence);
      while (tokenIterator.hasNext()) {
        TokenAnnotation token = (TokenAnnotation) tokenIterator.next();

        tokenList.add(token);
        wordList.add(token.getCoveredText());
      }

      List<String> wordTagList = Viterbi.process(this.N, wordList, this.my_model.suffix_tree, this.my_model.suffix_tree_capitalized, this.my_model.transition_probs,
              this.my_model.word_probs, this.my_model.lambdas2, this.my_model.lambdas3, this.my_model.theta);

    
      try {
        for (int i = 0; i < tokenList.size(); i++) {
          TokenAnnotation token = tokenList.get(i);
          String posTag = wordTagList.get(i);
          token.setPosTag(posTag);
        }
      } catch (IndexOutOfBoundsException e) {
        System.err.println("POS tagger error - list of tags shorter than list of words");
      }
    }
View Full Code Here

      tokenList.clear();
      wordList.clear();

      FSIterator tokenIterator = tokenIndex.subiterator(sentence);
      while (tokenIterator.hasNext()) {
        TokenAnnotation token = (TokenAnnotation) tokenIterator.next();

        tokenList.add(token);
        wordList.add(token.getCoveredText());
      }

      List<String> wordTagList = Viterbi.process(this.N, wordList, this.my_model.suffix_tree, this.my_model.suffix_tree_capitalized, this.my_model.transition_probs,
              this.my_model.word_probs, this.my_model.lambdas2, this.my_model.lambdas3, this.my_model.theta);


      try {
        for (int i = 0; i < tokenList.size(); i++) {
          Annotation token = tokenList.get(i);

          String posTag = wordTagList.get(i);
          Feature featPOS = getType(workingView, this.theTokenTypeName).getFeatureByBaseName(thePOSAttribute);
          token.setFeatureValueFromString(featPOS, posTag);

        }
      } catch (IndexOutOfBoundsException e) {
        System.err.println("POS tagger error - list of tags shorter than list of words");
      }
View Full Code Here

      tokenList.clear();
      wordList.clear();

      FSIterator tokenIterator = tokenIndex.subiterator(sentence);
      while (tokenIterator.hasNext()) {
        TokenAnnotation token = (TokenAnnotation) tokenIterator.next();

        tokenList.add(token);
        wordList.add(token.getCoveredText());
      }

      List<String> wordTagList = Viterbi.process(this.N, wordList, this.my_model.suffix_tree, this.my_model.suffix_tree_capitalized, this.my_model.transition_probs,
              this.my_model.word_probs, this.my_model.lambdas2, this.my_model.lambdas3, this.my_model.theta);

    
      try {
        for (int i = 0; i < tokenList.size(); i++) {
          TokenAnnotation token = tokenList.get(i);
          String posTag = wordTagList.get(i);
          token.setPosTag(posTag);
        }
      } catch (IndexOutOfBoundsException e) {
        System.err.println("POS tagger error - list of tags shorter than list of words");
      }
    }
View Full Code Here

  private List<String> getCurrentTagList(JCas cas) {
    List<String> tagList = new ArrayList<String>();
    AnnotationIndex tokenIndex = cas.getAnnotationIndex(TokenAnnotation.type);
    FSIterator tokIt = tokenIndex.iterator();
    TokenAnnotation token = null;
    for (tokIt.moveToFirst(); tokIt.isValid(); tokIt.moveToNext()) {
      token = (TokenAnnotation) tokIt.get();
      tagList.add(token.getPosTag());
    }
    return tagList;
  }
View Full Code Here

  private void printPosTags(JCas cas) throws UnsupportedEncodingException, IOException {
    Writer writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(
        "moby-tag-list.txt"), "utf-8"));
    AnnotationIndex tokenIndex = cas.getAnnotationIndex(TokenAnnotation.type);
    FSIterator tokIt = tokenIndex.iterator();
    TokenAnnotation token = null;
    for (tokIt.moveToFirst(); tokIt.isValid(); tokIt.moveToNext()) {
      token = (TokenAnnotation) tokIt.get();
      writer.write(token.getPosTag());
      writer.write('\n');
    }
    writer.close();
  }
View Full Code Here

TOP

Related Classes of org.apache.uima.TokenAnnotation

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.