Package edu.buffalo.cse.ir.wikiindexer.tokenizer

Examples of edu.buffalo.cse.ir.wikiindexer.tokenizer.TokenStream


   * Test method for {@link edu.buffalo.cse.ir.wikiindexer.tokenizer.TokenStream#hasPrevious()}.
   */
  @Test
  public void testHasPrevious() {
    //null
    TokenStream stream = new TokenStream((String)null);
    assertEquals(false, stream.hasPrevious());
    stream = null;
   
    //empty
    stream = new TokenStream("");
    assertEquals(false, stream.hasPrevious());
    stream = null;
   
    //some text and iteration
    stream = new TokenStream("this");
    stream.append("is","a","test","stream");
    assertFalse(stream.hasPrevious()); //start of stream
    stream.seekEnd();
    assertTrue(stream.hasPrevious());
    stream.previous(); //after this
    assertTrue(stream.hasPrevious());
    stream.previous(); //after is
    assertTrue(stream.hasPrevious());
    stream.previous(); //after a
    assertTrue(stream.hasPrevious());
    stream.previous(); //after test
    assertTrue(stream.hasPrevious());
    stream.previous(); //after stream
    assertFalse(stream.hasPrevious());
    stream = null;
   
    //with seek
    stream = new TokenStream("this");
    stream.append("is","a","test","stream");
    stream.reset();
    assertFalse(stream.hasPrevious());
    stream = null;
   
    //forward and reverse
    stream = new TokenStream("this");
    stream.append("is","a","test","stream");
    stream.next();
    assertTrue(stream.hasPrevious());
    stream.previous();
    assertFalse(stream.hasPrevious());
    stream = null;
   
    //with remove
    stream = new TokenStream("this");
    stream.append("is","a","test","stream");
    stream.remove();
    assertFalse(stream.hasPrevious());
    stream = null;
   
    //with merge with previous
    stream = new TokenStream("this");
    stream.append("is","a","test","stream");
    stream.next();
    stream.mergeWithPrevious();
    assertFalse(stream.hasPrevious());
    stream = null;
   
    //with merge with next
    stream = new TokenStream("this");
    stream.append("is","a","test","stream");
    stream.mergeWithNext();
    assertFalse(stream.hasPrevious());
    stream = null;
  }
View Full Code Here


   * Test method for {@link edu.buffalo.cse.ir.wikiindexer.tokenizer.TokenStream#next()}.
   */
  @Test
  public void testNext() {
    //null
    TokenStream stream = new TokenStream((String) null);
    assertNull(stream.next());
    stream = null;
   
    //empty str
    stream = new TokenStream("");
    assertNull(stream.next());
    stream = null;
   
    //fwd iteration
    stream = new TokenStream("this");
    stream.append("is","a","test","stream");
    assertEquals("this", stream.next());
    assertEquals("is", stream.next());
    assertEquals("a", stream.next());
    assertEquals("test", stream.next());
    assertEquals("stream", stream.next());
    assertNull(stream.next());
    stream = null;
   
    //fwd and reverse
    stream = new TokenStream("this");
    stream.append("is","a","test","stream");
    assertEquals("this", stream.next());
    stream.previous();
    assertEquals("this", stream.next());
    assertEquals("is", stream.next());
    assertEquals("a", stream.next());
    stream.reset();
    assertEquals("this", stream.next());
    stream = null;
   
    //with remove
    stream = new TokenStream("this");
    stream.append("is","a","test","stream");
    stream.remove();
    assertEquals("is", stream.next());
    stream.remove();
    assertEquals("test", stream.next());
    stream = null;
   
    //with merge with previous
    stream = new TokenStream("this");
    stream.append("is","a","test","stream");
    stream.next();
    stream.mergeWithPrevious();
    assertEquals("this is", stream.next());
    stream = null;
   
    //with merge with next
    stream = new TokenStream("this");
    stream.append("is","a","test","stream");
    stream.mergeWithNext();
    assertEquals("this is", stream.next());
    stream = null;
  }
View Full Code Here

    IndexableDocument idoc = new IndexableDocument();
   
    idoc.docId = doc1.getId();
   
   
    TokenStream author = new TokenStream(doc1.getAuthor());
    Tokenizer t_author = tknizerMap1.get(INDEXFIELD.AUTHOR);
    t_author.tokenize(author);
    //System.out.println("=======" +author.getAllTokens());
   
   
    TokenStream categories = new TokenStream(doc1.getCategories().toString());
    Tokenizer t_categories = tknizerMap1.get(INDEXFIELD.CATEGORY);
    t_categories.tokenize(categories);
    //System.out.println("=======" +categories.getAllTokens());
   
    TokenStream links = new TokenStream(doc1.getLinks().toString());
    Tokenizer t_links = tknizerMap1.get(INDEXFIELD.LINK);
    t_links.tokenize(links);
    //System.out.println("=======" +links.getAllTokens());
   
    TokenStream term = new TokenStream(doc1.getSections().get(0).getText());
    for(int i=1;i<doc1.getSections().size();i++)
    {
      term.append(doc1.getSections().get(i).getText());
    }
    Tokenizer t_term = tknizerMap1.get(INDEXFIELD.TERM);
    t_term.tokenize(term);
    //System.out.println("=======" +term.getAllTokens());
   
View Full Code Here

   * Test method for {@link edu.buffalo.cse.ir.wikiindexer.tokenizer.TokenStream#previous()}.
   */
  @Test
  public void testPrevious() {
    //null
    TokenStream stream = new TokenStream((String) null);
    assertNull(stream.previous());
    stream = null;
   
    //empty str
    stream = new TokenStream("");
    assertNull(stream.previous());
    stream = null;
   
    //reverse iteration
    stream = new TokenStream("this");
    stream.append("is","a","test","stream");
    stream.seekEnd();
    assertEquals("stream", stream.previous());
    assertEquals("test", stream.previous());
    assertEquals("a", stream.previous());
    assertEquals("is", stream.previous());
    assertEquals("this", stream.previous());
    assertNull(stream.previous());
    stream = null;
   
    //fwd and reverse
    stream = new TokenStream("this");
    stream.append("is","a","test","stream");
    stream.seekEnd();
    assertEquals("stream", stream.previous());
    stream.next();
    assertEquals("stream", stream.previous());
    assertEquals("test", stream.previous());
    assertEquals("a", stream.previous());
    stream.reset();
    assertEquals("this", stream.next());
    stream = null;
   
    //with remove
    stream = new TokenStream("this");
    stream.append("is","a","test","stream");
    stream.remove();
    stream.next();
    assertEquals("is", stream.previous());
    stream.next();
    stream.remove();
    assertEquals("is", stream.previous());
    stream = null;
   
    //with merge with previous
    stream = new TokenStream("this");
    stream.append("is","a","test","stream");
    stream.next();
    stream.mergeWithPrevious();
    assertNull(stream.previous());
    stream.next();
    assertEquals("this is", stream.previous());
    stream = null;
   
    //with merge with next
    stream = new TokenStream("this");
    stream.append("is","a","test","stream");
    stream.mergeWithNext();
    assertNull(stream.previous());
    stream.next();
    assertEquals("this is", stream.previous());
    stream = null;
  }
View Full Code Here

   * Test method for {@link edu.buffalo.cse.ir.wikiindexer.tokenizer.TokenStream#remove()}.
   */
  @Test
  public void testRemove() {
    //remove on null
    TokenStream stream = new TokenStream((String) null);
    stream.remove();
    assertNull(stream.getAllTokens());
    stream = null;
   
    //remove on empty
    stream = new TokenStream("");
    stream.remove();
    assertNull(stream.getAllTokens());
    stream = null;
   
    //remove till empty
    stream = new TokenStream("this");
    stream.append("is","a","test","stream");
   
    int currcnt = 5;
    while (stream.hasNext()) {
      assertEquals(currcnt--, stream.getAllTokens().size());
      stream.remove();
    }
    stream = null;
   
    //remove from invalid position
    stream = new TokenStream("this");
    stream.append("is","a","test","stream");
    stream.seekEnd();
    stream.remove();
    assertEquals(5, stream.getAllTokens().size());
    stream = null;
  }
View Full Code Here

   * Test method for {@link edu.buffalo.cse.ir.wikiindexer.tokenizer.TokenStream#mergeWithPrevious()}.
   */
  @Test
  public void testMergeWithPrevious() {
    //everything is null, empty
    TokenStream stream = new TokenStream((String) null);
    assertFalse(stream.mergeWithPrevious());
    stream = null;
   
    stream = new TokenStream("");
    assertFalse(stream.mergeWithPrevious());
    stream = null;
   
    //previous is null
    stream = new TokenStream("this");
    stream.append("is","a","test","stream");
    assertFalse(stream.mergeWithPrevious());
   
    //proper merge
    stream.seekEnd();
    stream.previous();
    assertTrue(stream.mergeWithPrevious());
    assertEquals("test stream", stream.next());
    assertEquals(4, stream.getAllTokens().size());
    stream = null;
   
    //full merge - reverse
    stream = new TokenStream("this");
    stream.append("is","a","test","stream");
    stream.seekEnd();
    stream.previous();
    assertTrue(stream.mergeWithPrevious());
    assertEquals("test stream", stream.next());
    stream.previous();
    assertTrue(stream.mergeWithPrevious());
    assertEquals("a test stream", stream.next());
    stream.previous();
    assertTrue(stream.mergeWithPrevious());
    assertEquals("is a test stream", stream.next());
    stream.previous();
    assertTrue(stream.mergeWithPrevious());
    assertEquals("this is a test stream", stream.next());
    stream.previous();
    assertFalse(stream.mergeWithPrevious());
    stream = null;
   
    //full merge - forward
    stream = new TokenStream("this");
    stream.append("is","a","test","stream");
    stream.next();
    assertTrue(stream.mergeWithPrevious());
    assertEquals("this is", stream.next());
    assertTrue(stream.mergeWithPrevious());
    assertEquals("this is a", stream.next());
    assertTrue(stream.mergeWithPrevious());
    assertEquals("this is a test", stream.next());
    assertTrue(stream.mergeWithPrevious());
    assertEquals("this is a test stream", stream.next());
    assertFalse(stream.mergeWithPrevious());
    stream = null;
  }
View Full Code Here

   * Test method for {@link edu.buffalo.cse.ir.wikiindexer.tokenizer.TokenStream#mergeWithNext()}.
   */
  @Test
  public void testMergeWithNext() {
    //everything is null, empty
    TokenStream stream = new TokenStream((String) null);
    assertFalse(stream.mergeWithNext());
    stream = null;
   
    stream = new TokenStream("");
    assertFalse(stream.mergeWithNext());
    stream = null;
   
    //next is null
    stream = new TokenStream("this");
    stream.append("is","a","test","stream");
    stream.seekEnd();
    assertFalse(stream.mergeWithNext());
   
    //proper merge
    stream.reset();
    assertTrue(stream.mergeWithNext());
    assertEquals("this is", stream.next());
    assertEquals(4, stream.getAllTokens().size());
    stream = null;
   
    //full merge - reverse
    stream = new TokenStream("this");
    stream.append("is","a","test","stream");
    stream.seekEnd();
    stream.previous();
    stream.previous();
    assertTrue(stream.mergeWithNext());
    assertEquals("test stream", stream.next());
    stream.previous();
    stream.previous();
    assertTrue(stream.mergeWithNext());
    assertEquals("a test stream", stream.next());
    stream.previous();
    stream.previous();
    assertTrue(stream.mergeWithNext());
    assertEquals("is a test stream", stream.next());
    stream.previous();
    stream.previous();
    assertTrue(stream.mergeWithNext());
    assertEquals("this is a test stream", stream.next());
    stream.previous();
    assertFalse(stream.mergeWithNext());
    stream = null;
   
    //full merge - forward
    stream = new TokenStream("this");
    stream.append("is","a","test","stream");
    assertTrue(stream.mergeWithNext());
    assertEquals("this is", stream.next());
    stream.previous();
    assertTrue(stream.mergeWithNext());
    assertEquals("this is a", stream.next());
    stream.previous();
    assertTrue(stream.mergeWithNext());
    assertEquals("this is a test", stream.next());
    stream.previous();
    assertTrue(stream.mergeWithNext());
    assertEquals("this is a test stream", stream.next());
    assertFalse(stream.mergeWithNext());
    stream = null;
  }
View Full Code Here

   */
  public void addField(INDEXFIELD field, TokenStream stream) {
    //TODO: Implement this method
    if (finalIndexableMap.containsKey(field))
    {
      TokenStream stream1 = finalIndexableMap.get(field);
      stream1.merge(stream);
      finalIndexableMap.put(field,stream1);
    }
   
    else
    {
View Full Code Here

   * Test method for {@link edu.buffalo.cse.ir.wikiindexer.tokenizer.TokenStream#set(java.lang.String[])}.
   */
  @Test
  public void testSet() {
    //set on null and empty streams
    TokenStream stream = new TokenStream((String)null);
    stream.set("invalid");
    assertNull(stream.getAllTokens());
    stream = null;
   
    stream = new TokenStream("");
    stream.set("invalid");
    assertNull(stream.getAllTokens());
    stream = null;
   
    //valid posiiton, null or empty tokens
    stream = new TokenStream("this");
    stream.append("is","a","test","stream");
    stream.set((String)null);
    assertEquals("this", stream.next());
    stream.previous();
    stream.set("");
    assertEquals("this", stream.next());
    stream = null;
   
    //valid new token, invalid position
    stream = new TokenStream("this");
    stream.append("is","a","test","stream");
    stream.seekEnd();
    stream.set("valid");
    assertEquals("stream", stream.previous());
    stream = null;
   
    //correct set, single token
    stream = new TokenStream("this");
    stream.append("is","a","test","stream");
    stream.set("that");
    assertEquals(5, stream.getAllTokens().size());
    assertEquals("that", stream.next());
    stream = null;
   
    //correct set, multiple tokens at the end
    stream = new TokenStream("this");
    stream.append("is","a","test","stream");
    stream.seekEnd();
    stream.previous();
    stream.set("of", "the", "set", "method");
    assertEquals(8, stream.getAllTokens().size());
    assertEquals("method", stream.next());
    stream = null;
   
    //correct set, multiple tokens at the start
    stream = new TokenStream("this");
    stream.append("is","a","test","stream");
    stream.set("you","think","styx");
    assertEquals(7, stream.getAllTokens().size());
    assertEquals("styx", stream.next());
    stream = null;
   
    //correct set, multiple tokens in the middle
    stream = new TokenStream("this");
    stream.append("is","a","test","stream");
    stream.seekEnd();
    stream.previous();
    stream.previous();
    stream.set("really", "interesting");
    assertEquals(6, stream.getAllTokens().size());
    assertEquals("interesting", stream.next());
    assertEquals("stream", stream.next());
    assertFalse(stream.hasNext());
    stream = null;
  }
View Full Code Here

TOP

Related Classes of edu.buffalo.cse.ir.wikiindexer.tokenizer.TokenStream

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.