Package opennlp.tools.util

Examples of opennlp.tools.util.StringList


  private Dictionary getDict() {
    return new Dictionary(false);
  }
 
  private StringList asSL(String str) {
    return new StringList(str);
  }
View Full Code Here


    POSSample sample;
    while((sample = samples.read()) != null) {
      String[] words = sample.getSentence();
     
      if (words.length > 0)
        ngramModel.add(new StringList(words), 1, 1);
    }
   
    ngramModel.cutoff(cutoff, Integer.MAX_VALUE);
   
    return ngramModel.toDictionary(true);
View Full Code Here

    List<String> e = new ArrayList<String>();
    e.add("default");
    // add the word itself
    e.add("w=" + lex);
    dictGram[0] = lex;
    if (dict == null || !dict.contains(new StringList(dictGram))) {
      // do some basic suffix analysis
      String[] suffs = getSuffixes(lex);
      for (int i = 0; i < suffs.length; i++) {
        e.add("suf=" + suffs[i]);
      }
View Full Code Here

        String word = iterator.next();

        Attributes tagAttribute = new Attributes();
        tagAttribute.setValue("tags", tagsToString(getTags(word)));

        return new Entry(new StringList(word), tagAttribute);
      }

      public void remove() {
        throw new UnsupportedOperationException();
      }
View Full Code Here

        String tagString = entry.getAttributes().getValue("tags");

        String[] tags = tagString.split(" ");

        StringList word = entry.getTokens();

        if (word.size() != 1)
          throw new InvalidFormatException("Each entry must have exactly one token! "+word);

        newPosDict.dictionary.put(word.getToken(0), tags);
      }});

    newPosDict.caseSensitive = isCaseSensitive;
   
    // TODO: The dictionary API needs to be improved to do this better!
View Full Code Here

      for (int wi=0;wi<words.length;wi++) {
        words[wi] =
            tt[wi].substring(0,tt[wi].lastIndexOf('_'));
      }

      ngramModel.add(new StringList(words), 1, 1);
    }

    System.out.println("Saving the dictionary");

    ngramModel.cutoff(cutoff, Integer.MAX_VALUE);
View Full Code Here

              "", entry.getAttributes().getValue(key));
    }

    hd.startElement("", "", ENTRY_ELEMENT, entryAttributes);

    StringList tokens = entry.getTokens();

    for (Iterator<String> it = tokens.iterator(); it.hasNext(); ) {

      hd.startElement("", "", TOKEN_ELEMENT, new AttributesImpl());

      String token = it.next();
View Full Code Here

       else if (ENTRY_ELEMENT.equals(localName)) {

         String[] tokens = mTokenList.toArray(
             new String[mTokenList.size()]);

         Entry entry = new Entry(new StringList(tokens), mAttributes);

         try {
           mInserter.insert(entry);
         } catch (InvalidFormatException e) {
           throw new SAXException("Invalid dictionary format!", e);
View Full Code Here

  private static Dictionary readNames(String nameFile) throws IOException {
    Dictionary names = new Dictionary();

    BufferedReader nameReader = new BufferedReader(new FileReader(nameFile));
    for (String line = nameReader.readLine(); line != null; line = nameReader.readLine()) {
      names.put(new StringList(line));
    }

    return names;
  }
View Full Code Here

    if (dict != null) {

      if (p_2 != null) {
        unigram[0] = p_2.getHead().toString();
        u_2 = dict.contains(new StringList(unigram));
      }

      if (p2 != null) {
        unigram[0] = p2.getHead().toString();
        u2 = dict.contains(new StringList(unigram));
      }

      unigram[0] = p0.getHead().toString();
      u0 = dict.contains(new StringList(unigram));

      if (p_2 != null && p_1 != null) {
        bigram[0] = p_2.getHead().toString();
        bigram[1] = p_1.getHead().toString();
        b_2_1 = dict.contains(new StringList(bigram));

        trigram[0] = p_2.getHead().toString();
        trigram[1] = p_1.getHead().toString();
        trigram[2] = p0.getHead().toString();
        t_2_10 = dict.contains(new StringList(trigram));
      }
      if (p_1 != null && p1 != null) {
        trigram[0] = p_1.getHead().toString();
        trigram[1] = p0.getHead().toString();
        trigram[2] = p1.getHead().toString();
        t_101 = dict.contains(new StringList(trigram));
      }
      if (p_1 != null) {
        unigram[0] = p_1.getHead().toString();
        u_1 = dict.contains(new StringList(unigram));

        //extra check for 2==null case
        b_2_1 = b_2_1 && u_1 & u_2;
        t_2_10 = t_2_10 && u_1 & u_2 & u0;
        t_101 = t_101 && u_1 & u0 && u1;

        bigram[0] = p_1.getHead().toString();
        bigram[1] = p0.getHead().toString();
        b_10 = dict.contains(new StringList(bigram)) && u_1 && u0;
      }
      if (p1 != null && p2 != null) {
        bigram[0] = p1.getHead().toString();
        bigram[1] = p2.getHead().toString();
        b12 = dict.contains(new StringList(bigram));

        trigram[0] = p0.getHead().toString();
        trigram[1] = p1.getHead().toString();
        trigram[2] = p2.getHead().toString();
        t012 = dict.contains(new StringList(trigram));
      }
      if (p1 != null) {
        unigram[0] = p1.getHead().toString();
        u1 = dict.contains(new StringList(unigram));

        //extra check for 2==null case
        b12 = b12 && u1 && u2;
        t012 = t012 && u1 && u2 && u0;
        t_101 = t_101 && u0 && u_1 && u1;

        bigram[0] = p0.getHead().toString();
        bigram[1] = p1.getHead().toString();
        b01 = dict.contains(new StringList(bigram));
        b01 = b01 && u0 && u1;
      }
    }

    String consp_2 = cons(p_2, -2);
View Full Code Here

TOP

Related Classes of opennlp.tools.util.StringList

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.