Examples of IteratorTokenStream


Examples of org.allspice.parser.test.IteratorTokenStream

   * @throws InvalidStartRule
   * @throws StateConflict
   * @throws SyntaxError
   */
  public static void main(String[] args) throws StateConflict, InvalidStartRule, StrandedSymbol, SyntaxError {
    final IteratorTokenStream iterator = new IteratorTokenStream(new Token("id",3.0),new Token("*"),new Token("id",5.0)) ;
    Parser parser = new Parser(createParseTable(),iterator) ;
    Object res = parser.parse() ;
    System.out.println("res="+res) ;
  }
View Full Code Here

Examples of org.apache.mahout.common.lucene.IteratorTokenStream

    final String label = key.toString();
    String[] tokens = SPACE_TAB.split(value.toString());
    OpenObjectIntHashMap<String> wordList = new OpenObjectIntHashMap<String>(tokens.length * gramSize);
   
    if (gramSize > 1) {
      ShingleFilter sf = new ShingleFilter(new IteratorTokenStream(Iterators.forArray(tokens)), gramSize);
      do {
        String term = sf.getAttribute(CharTermAttribute.class).toString();
        if (!term.isEmpty()) {
          if (wordList.containsKey(term)) {
            wordList.put(term, 1 + wordList.get(term));
View Full Code Here

Examples of org.apache.mahout.common.lucene.IteratorTokenStream

    StringTuple value = it.next();

    Vector vector = new RandomAccessSparseVector(dimension, value.length()); // guess at initial size

    if (maxNGramSize >= 2) {
      ShingleFilter sf = new ShingleFilter(new IteratorTokenStream(value.getEntries().iterator()), maxNGramSize);
      sf.reset();
      try {
        do {
          String term = sf.getAttribute(CharTermAttribute.class).toString();
          if (!term.isEmpty() && dictionary.containsKey(term)) { // ngram
View Full Code Here

Examples of org.apache.mahout.common.lucene.IteratorTokenStream

   * @throws IOException if there's a problem with the ShingleFilter reading data or the collector collecting output.
   */
  @Override
  protected void map(Text key, StringTuple value, final Context context) throws IOException, InterruptedException {

    ShingleFilter sf = new ShingleFilter(new IteratorTokenStream(value.getEntries().iterator()), maxShingleSize);
    sf.reset();
    try {
      int count = 0; // ngram count

      OpenObjectIntHashMap<String> ngrams =
View Full Code Here

Examples of org.apache.mahout.common.lucene.IteratorTokenStream

    final String label = key.toString();
    String[] tokens = SPACE_PATTERN.split(value.toString());
    OpenObjectIntHashMap<String> wordList = new OpenObjectIntHashMap<String>(tokens.length * gramSize);
   
    if (gramSize > 1) {
      ShingleFilter sf = new ShingleFilter(new IteratorTokenStream(Iterators.forArray(tokens)), gramSize);
      do {
        String term = sf.getAttribute(TermAttribute.class).term();
        if (term.length() > 0) {
          if (wordList.containsKey(term)) {
            wordList.put(term, 1 + wordList.get(term));
View Full Code Here

Examples of org.apache.mahout.common.lucene.IteratorTokenStream

   * @throws IOException if there's a problem with the ShingleFilter reading data or the collector collecting output.
   */
  @Override
  protected void map(Text key, StringTuple value, final Context context) throws IOException, InterruptedException {

    ShingleFilter sf = new ShingleFilter(new IteratorTokenStream(value.getEntries().iterator()), maxShingleSize);
    int count = 0; // ngram count

    OpenObjectIntHashMap<String> ngrams =
            new OpenObjectIntHashMap<String>(value.getEntries().size() * (maxShingleSize - 1));
    OpenObjectIntHashMap<String> unigrams = new OpenObjectIntHashMap<String>(value.getEntries().size());
View Full Code Here

Examples of org.apache.mahout.common.lucene.IteratorTokenStream

    StringTuple value = it.next();

    Vector vector = new RandomAccessSparseVector(dimension, value.length()); // guess at initial size

    if (maxNGramSize >= 2) {
      ShingleFilter sf = new ShingleFilter(new IteratorTokenStream(value.getEntries().iterator()), maxNGramSize);

      do {
        String term = (sf.getAttribute(CharTermAttribute.class)).toString();
        if (term.length() > 0 && dictionary.containsKey(term)) { // ngram
          int termId = dictionary.get(term);
View Full Code Here

Examples of org.apache.mahout.common.lucene.IteratorTokenStream

   * @throws IOException if there's a problem with the ShingleFilter reading data or the collector collecting output.
   */
  @Override
  protected void map(Text key, StringTuple value, final Context context) throws IOException, InterruptedException {

    ShingleFilter sf = new ShingleFilter(new IteratorTokenStream(value.getEntries().iterator()), maxShingleSize);

    try {
      int count = 0; // ngram count

      OpenObjectIntHashMap<String> ngrams =
View Full Code Here

Examples of org.apache.mahout.common.lucene.IteratorTokenStream

    StringTuple value = it.next();

    Vector vector = new RandomAccessSparseVector(dimension, value.length()); // guess at initial size

    if (maxNGramSize >= 2) {
      ShingleFilter sf = new ShingleFilter(new IteratorTokenStream(value.getEntries().iterator()), maxNGramSize);
      try {
        do {
          String term = sf.getAttribute(CharTermAttribute.class).toString();
          if (!term.isEmpty() && dictionary.containsKey(term)) { // ngram
            int termId = dictionary.get(term);
View Full Code Here

Examples of org.apache.mahout.common.lucene.IteratorTokenStream

    StringTuple value = it.next();

    Vector vector = new RandomAccessSparseVector(dimension, value.length()); // guess at initial size

    if (maxNGramSize >= 2) {
      ShingleFilter sf = new ShingleFilter(new IteratorTokenStream(value.getEntries().iterator()), maxNGramSize);
      sf.reset();
      try {
        do {
          String term = sf.getAttribute(CharTermAttribute.class).toString();
          if (!term.isEmpty() && dictionary.containsKey(term)) { // ngram
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.