Package edu.buffalo.cse.ir.wikiindexer.tokenizer

Examples of edu.buffalo.cse.ir.wikiindexer.tokenizer.Tokenizer


   
    idoc.docId = doc1.getId();
   
   
    TokenStream author = new TokenStream(doc1.getAuthor());
    Tokenizer t_author = tknizerMap1.get(INDEXFIELD.AUTHOR);
    t_author.tokenize(author);
    //System.out.println("=======" +author.getAllTokens());
   
   
    TokenStream categories = new TokenStream(doc1.getCategories().toString());
    Tokenizer t_categories = tknizerMap1.get(INDEXFIELD.CATEGORY);
    t_categories.tokenize(categories);
    //System.out.println("=======" +categories.getAllTokens());
   
    TokenStream links = new TokenStream(doc1.getLinks().toString());
    Tokenizer t_links = tknizerMap1.get(INDEXFIELD.LINK);
    t_links.tokenize(links);
    //System.out.println("=======" +links.getAllTokens());
   
    TokenStream term = new TokenStream(doc1.getSections().get(0).getText());
    for(int i=1;i<doc1.getSections().size();i++)
    {
      term.append(doc1.getSections().get(i).getText());
    }
    Tokenizer t_term = tknizerMap1.get(INDEXFIELD.TERM);
    t_term.tokenize(term);
    //System.out.println("=======" +term.getAllTokens());
   
   
      idoc.addField(INDEXFIELD.AUTHOR, author);
      idoc.addField(INDEXFIELD.CATEGORY, categories);
View Full Code Here

TOP

Related Classes of edu.buffalo.cse.ir.wikiindexer.tokenizer.Tokenizer

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.