Package pignlproc.evaluation

Source Code of pignlproc.evaluation.TestEvalFunctions

package pignlproc.evaluation;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;

import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;

import opennlp.tools.util.Span;

import org.apache.commons.lang.StringUtils;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.DefaultDataBag;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.junit.Before;
import org.junit.Test;

public class TestEvalFunctions {

    public static final String JOHN_SENTENCE = "John Smith works at Smith Consulting.";

    public static final String JOHN_SENTENCE_WITH_TABS = "John\tSmith\tworks\nat Smith Consulting.";

    protected MergeAsOpenNLPAnnotatedText merger;

    protected AggregateTextBag aggregator;

    protected ConcatTextBag concatTextBag;

    @Before
    public void setUp() throws IOException {
        merger = new MergeAsOpenNLPAnnotatedText();
        aggregator = new AggregateTextBag(40, true);
        concatTextBag = new ConcatTextBag(" _ ", true);
    }

    @Test
    public void testSimpleSentenceMerge() throws ExecException {
        String sentence = JOHN_SENTENCE;
        List<Span> names = Arrays.asList(new Span(0, 10, "person"), new Span(
                19, 36, "organization"));
        String merged = merger.merge(sentence, names);
        assertEquals("<START:person> John Smith <END> works"
                + " at <START:organization> Smith Consulting <END> .", merged);

        names = Arrays.asList(new Span(0, 10), new Span(19, 36));
        merged = merger.merge(sentence, names);
        assertEquals("<START> John Smith <END> works"
                + " at <START> Smith Consulting <END> .", merged);
    }

    @SuppressWarnings("unchecked")
    @Test
    public void testBagOfAnnotationMerge() throws IOException {
        TupleFactory tf = TupleFactory.getInstance();
        DefaultDataBag textBag = new DefaultDataBag();
        DefaultDataBag beginBag = new DefaultDataBag();
        DefaultDataBag endBag = new DefaultDataBag();
        DefaultDataBag typeBag = new DefaultDataBag();

        textBag.add(tf.newTupleNoCopy(Arrays.asList(JOHN_SENTENCE)));
        beginBag.add(tf.newTupleNoCopy(Arrays.asList(0)));
        endBag.add(tf.newTupleNoCopy(Arrays.asList(10)));
        typeBag.add(tf.newTupleNoCopy(Arrays.asList("person")));

        textBag.add(tf.newTupleNoCopy(Arrays.asList(JOHN_SENTENCE)));
        beginBag.add(tf.newTupleNoCopy(Arrays.asList(19)));
        endBag.add(tf.newTupleNoCopy(Arrays.asList(36)));
        typeBag.add(tf.newTupleNoCopy(Arrays.asList("organization")));

        // all bags
        Tuple input = tf.newTupleNoCopy(Arrays.asList(textBag, beginBag,
                endBag, typeBag));
        String merged = merger.exec(input);
        assertEquals("<START:person> John Smith <END> works"
                + " at <START:organization> Smith Consulting <END> .", merged);

        // all literals
        input = tf.newTupleNoCopy(Arrays.asList(JOHN_SENTENCE, 0, 10, "person"));
        merged = merger.exec(input);
        assertEquals(
                "<START:person> John Smith <END> works at Smith Consulting .",
                merged);

        // bags without types
        input = tf.newTupleNoCopy(Arrays.asList(textBag, beginBag, endBag));
        merged = merger.exec(input);
        assertEquals(
                "<START> John Smith <END> works at <START> Smith Consulting <END> .",
                merged);

        // bags with fixed type
        input = tf.newTupleNoCopy(Arrays.asList(textBag, beginBag, endBag,
                "entity"));
        merged = merger.exec(input);
        assertEquals("<START:entity> John Smith <END> works at"
                + " <START:entity> Smith Consulting <END> .", merged);
    }

    @Test
    public void testAggregateBagOfText() throws IOException {
        TupleFactory tf = TupleFactory.getInstance();
        DefaultDataBag textBag = new DefaultDataBag();

        textBag.add(tf.newTupleNoCopy(Arrays.asList(JOHN_SENTENCE)));
        textBag.add(tf.newTupleNoCopy(Arrays.asList(JOHN_SENTENCE)));
        textBag.add(tf.newTupleNoCopy(Arrays.asList(JOHN_SENTENCE)));
        textBag.add(tf.newTupleNoCopy(Arrays.asList(JOHN_SENTENCE)));
        textBag.add(tf.newTupleNoCopy(Arrays.asList(JOHN_SENTENCE)));
        textBag.add(tf.newTupleNoCopy(Arrays.asList(JOHN_SENTENCE)));

        // all bags
        Tuple input = tf.newTupleNoCopy(Arrays.asList(textBag));
        String merged = aggregator.exec(input);
        String expected = StringUtils.join(
                Arrays.asList(JOHN_SENTENCE, JOHN_SENTENCE), " ");
        expected = "\"" + expected + "\"";
        assertEquals(expected, merged);
    }

    @Test
    public void testAggregateBagOfTextWithTabs() throws IOException {
        TupleFactory tf = TupleFactory.getInstance();
        DefaultDataBag textBag = new DefaultDataBag();

        textBag.add(tf.newTupleNoCopy(Arrays.asList(JOHN_SENTENCE_WITH_TABS)));
        textBag.add(tf.newTupleNoCopy(Arrays.asList(JOHN_SENTENCE_WITH_TABS)));
        textBag.add(tf.newTupleNoCopy(Arrays.asList(JOHN_SENTENCE_WITH_TABS)));
        textBag.add(tf.newTupleNoCopy(Arrays.asList(JOHN_SENTENCE_WITH_TABS)));
        textBag.add(tf.newTupleNoCopy(Arrays.asList(JOHN_SENTENCE_WITH_TABS)));
        textBag.add(tf.newTupleNoCopy(Arrays.asList(JOHN_SENTENCE_WITH_TABS)));

        // all bags
        Tuple input = tf.newTupleNoCopy(Arrays.asList(textBag));
        String merged = aggregator.exec(input);
        String expected = StringUtils.join(
                Arrays.asList(JOHN_SENTENCE, JOHN_SENTENCE), " ");
        expected = "\"" + expected + "\"";
        assertEquals(expected, merged);
    }

    @Test
    public void testConcatTextBag() throws IOException {
        TupleFactory tf = TupleFactory.getInstance();
        DefaultDataBag textBag = new DefaultDataBag();

        textBag.add(tf.newTupleNoCopy(Arrays.asList(" foo1")));
        textBag.add(tf.newTupleNoCopy(Arrays.asList("foo2")));
        textBag.add(tf.newTupleNoCopy(Arrays.asList(" foo3 ")));
        textBag.add(tf.newTupleNoCopy(Arrays.asList("foo1")));
        textBag.add(tf.newTupleNoCopy(Arrays.asList("foo4 ")));

        // all bags
        Tuple input = tf.newTupleNoCopy(Arrays.asList(textBag));
        String merged = concatTextBag.exec(input);
        String expected = "foo1 _ foo2 _ foo3 _ foo4";
        assertEquals(expected, merged);
    }

    @Test
    public void testNoLoopInPath() throws IOException {
        NoLoopInPath noloopInPathFunc = new NoLoopInPath(" ");
        TupleFactory tf = TupleFactory.getInstance();

        assertTrue(noloopInPathFunc.exec(null));
        assertTrue(noloopInPathFunc.exec(tf.newTupleNoCopy(Collections.emptyList())));
        assertTrue(noloopInPathFunc.exec(tf.newTupleNoCopy(Arrays.asList(""))));
        assertTrue(noloopInPathFunc.exec(tf.newTupleNoCopy(Arrays.asList("a"))));
        assertTrue(noloopInPathFunc.exec(tf.newTupleNoCopy(Arrays.asList("a b c d"))));

        assertFalse(noloopInPathFunc.exec(tf.newTupleNoCopy(Arrays.asList("a a"))));
        assertFalse(noloopInPathFunc.exec(tf.newTupleNoCopy(Arrays.asList("a b c a"))));
        assertFalse(noloopInPathFunc.exec(tf.newTupleNoCopy(Arrays.asList("a b c b"))));
        assertFalse(noloopInPathFunc.exec(tf.newTupleNoCopy(Arrays.asList("a b c c"))));

        // for efficiency reason, this function only check for loops introduced
        // by the last element:
        assertTrue(noloopInPathFunc.exec(tf.newTupleNoCopy(Arrays.asList("a b b d"))));
    }

    @Test
    public void testCheckAbstract() throws IOException {
        CheckAbstract abstractChecker = new CheckAbstract();
        TupleFactory tf = TupleFactory.getInstance();
        assertFalse(abstractChecker.exec(null));
        assertFalse(abstractChecker.exec(tf.newTupleNoCopy(Collections.emptyList())));
        assertFalse(abstractChecker.exec(tf.newTupleNoCopy(Arrays.asList((String) null))));
        assertFalse(abstractChecker.exec(tf.newTupleNoCopy(Arrays.asList(""))));
        assertFalse(abstractChecker.exec(tf.newTupleNoCopy(Arrays.asList("Some short boring abstract."))));
        String longBoringAbstract = "Elections by country gives information on elections."
                + " For each de jure and de facto sovereign state and dependent territory"
                + " an article on elections in that entity has been included and "
                + "information on the way the head of state and the parliament or "
                + "legislature is elected. The articles include the results of the "
                + "elections. For a chronological order, see the electoral calendar."
                + " Contents Top · 0–9 · A B C D E F G H I J K L M N O P Q R S T U V W X Y Z";
        assertFalse(abstractChecker.exec(tf.newTupleNoCopy(Arrays.asList(longBoringAbstract))));
        String interestingAbstract = "Mathematics is the study of quantity, structure,"
                + " space, and change. Mathematicians seek out patterns,"
                + " formulate new conjectures, and establish truth by rigorous"
                + " deduction from appropriately chosen axioms and definitions."
                + " There is debate over whether mathematical objects such as"
                + " numbers and points exist naturally or are human creations."
                + " The mathematician Benjamin Peirce called mathematics \"the"
                + " science that draws necessary conclusions\". Albert Einstein,"
                + " on the other hand, stated that \"as far as the laws of"
                + " mathematics refer to reality, they are not certain; and as"
                + " far as they are certain, they do not refer to reality. \""
                + " Through the use of abstraction and logical reasoning,"
                + " mathematics evolved from counting, calculation, measurement,"
                + " and the systematic study of the shapes and motions of"
                + " physical objects.";
        assertTrue(abstractChecker.exec(tf.newTupleNoCopy(Arrays.asList(interestingAbstract))));
    }
}
TOP

Related Classes of pignlproc.evaluation.TestEvalFunctions

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.