Package types

Examples of types.Alphabet


    // extract tokens from text
    ArrayList<Annotation> tokens = new ArrayList(inputAS.get(elementType));
    OffsetComparator oc = new OffsetComparator();
    Collections.sort(tokens, oc);
    ArrayList<Element<Object>> elements = new ArrayList<Element<Object>>();
    Alphabet labelAlphabet = tagger.getYAlphabet();
    SparseVector[] x;
    Object[] y;
    int[] labels;
    ElementSequence<Element<Object>> sequence;
    Element<Object> element;
    String chunk, label;

    for (Annotation token : tokens) {

      // extract the chunk of the token
      chunk = document.getContent().getContent(
          token.getStartNode().getOffset(),
          token.getEndNode().getOffset()).toString();
      label = "?";

      // create an element with the chunk and label
      element = new Element<Object>(chunk, label, token);
      elements.add(element);
    }

    x = new SparseVector[elements.size()];
    y = new Object[elements.size()];

    // fill in the labels
    for (int i = 0; i < y.length; i++)
      y[i] = "?";

    // create an element sequence and extract the features
    sequence = new ElementSequence(elements, xAlphabet, yAlphabet, x, y,
        document, inputAS);

    // extract the labels for the tokens in the sentence
    labels = tagger.label(extractor.process(sequence).x);
    FeatureMap features = Factory.newFeatureMap();
    long start = tokens.get(0).getStartNode().getOffset();
    long end;
   
    if (labels.length == tokens.size()) {

      for (int i = 1; i < tokens.size(); i++) {

        if (labels[i] == labelAlphabet.lookupObject("B")) {

          end = tokens.get(i).getStartNode().getOffset();
          outputAS.add(start, end, labelType, features);
          start = end;
        }
View Full Code Here

TOP

Related Classes of types.Alphabet

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.