Package org.sindice.siren.analysis.filter

Source Code of org.sindice.siren.analysis.filter.TestMailtoFilter

/**
* Copyright 2014 National University of Ireland, Galway.
*
* This file is part of the SIREn project. Project and contact information:
*
*  https://github.com/rdelbru/SIREn
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*  http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.sindice.siren.analysis.filter;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;

import java.io.IOException;
import java.io.StringReader;

import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.junit.Test;
import org.sindice.siren.analysis.TupleTokenizer;


/**
*
*/
public class TestMailtoFilter {

  private final String uritype = TupleTokenizer.getTokenTypes()[TupleTokenizer.URI];

  private final Tokenizer _t = new TupleTokenizer(new StringReader(""));

  /*
   * Helpers
   */

  private void assertURLDecodedTo(final Tokenizer t, final String uri, final String[] expectedStems)
  throws IOException {
    this.assertURLDecodedTo(t, uri, expectedStems, null);
  }

  private void assertURLDecodedTo(final Tokenizer t, final String uri, final String[] expectedStems, final String[] expectedTypes)
  throws IOException {
    this.assertURLDecodedTo(t, uri, expectedStems, expectedTypes, null);
  }

  private void assertURLDecodedTo(final Tokenizer t, final String uri, final String[] expectedStems, final String[] expectedTypes, final int[] expectedPosIncr)
  throws IOException {
    assertTrue("has CharTermAttribute", t.hasAttribute(CharTermAttribute.class));
    final CharTermAttribute termAtt = t.getAttribute(CharTermAttribute.class);

    assertTrue("has TypeAttribute", t.hasAttribute(TypeAttribute.class));
    final TypeAttribute typeAtt = t.getAttribute(TypeAttribute.class);

    assertTrue("has PositionIncrementAttribute", t.hasAttribute(PositionIncrementAttribute.class));
    final PositionIncrementAttribute posIncrAtt = t.getAttribute(PositionIncrementAttribute.class);

    t.setReader(new StringReader(uri));
    t.reset();

    final TokenFilter filter = new MailtoFilter(t);
    for (int i = 0; i < expectedStems.length; i++) {
        assertTrue("token " + i + " exists", filter.incrementToken());
        assertEquals(expectedStems[i], termAtt.toString());
        if (expectedTypes == null)
          assertEquals(uritype, typeAtt.type());
        else
          assertEquals(expectedTypes[i], typeAtt.type());
        if (expectedPosIncr != null)
          assertEquals(expectedPosIncr[i], posIncrAtt.getPositionIncrement());
    }
    filter.end();
    filter.close();
  }

  @Test
  public void testNoMailto()
  throws Exception {
    this.assertURLDecodedTo(_t, "<http://stephane.net>", new String[] { "http://stephane.net" });
  }

  @Test
  public void testMailto()
  throws Exception {
    this.assertURLDecodedTo(_t, "<mailto:stephane.campinas@deri.org>",
      new String[] { "stephane.campinas@deri.org", "mailto:stephane.campinas@deri.org" });
  }

  @Test
  public void testBadMailto()
  throws Exception {
    this.assertURLDecodedTo(_t, "<mailto//stephane.net>", new String[] { "mailto//stephane.net" });
    this.assertURLDecodedTo(_t, "<mailTo:stephane.net>", new String[] { "mailTo:stephane.net" });
  }

  @Test
  public void testDifferentTypes()
  throws Exception {
    this.assertURLDecodedTo(_t, "<mailto:stephane.net> \"literal\" <mailto:stephane.campinas@deri.org>",
      new String[] { "stephane.net", "mailto:stephane.net", "literal",
                     "stephane.campinas@deri.org", "mailto:stephane.campinas@deri.org" },
      new String[] { uritype, uritype, TupleTokenizer.getTokenTypes()[TupleTokenizer.LITERAL], uritype, uritype},
      new int[] { 1, 0, 1, 1, 0 });
  }

  @Test
  public void testShortURI()
  throws Exception {
    this.assertURLDecodedTo(_t, "<steph>", new String[] { "steph" });
  }

}
TOP

Related Classes of org.sindice.siren.analysis.filter.TestMailtoFilter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.