Package com.flaptor.hounder.crawler.pagedb

Examples of com.flaptor.hounder.crawler.pagedb.Page


    @TestInfo(testType = TestInfo.TestType.UNIT)
    public void testDoesNothing() throws Exception {
        moduleConfig.set("categories.order","sites,forums");
        AProcessorModule module = new OverrideCategoryModule(moduleName,globalConfig);
        FetchDocument doc = new FetchDocument(new Page("",1f));
        doc.addCategory("keep");
        module.process(doc);
       
        assertTrue("categories where " + doc.getCategories() + ". expected keep.",doc.getCategories().size() == 1);
        assertTrue("categories where " + doc.getCategories() + ". expected keep.",doc.getCategories().contains("keep"));
View Full Code Here


    }
   
    @TestInfo(testType = TestInfo.TestType.UNIT)
    public void testTrueInternalProcess() throws Exception {
        MockTrueModule trueModule= new MockTrueModule(MOCK_TRUE);
        FetchDocument doc= new FetchDocument(new Page("",1f));
        assertFalse(doc.hasTag(MockTrueModule.TAG_MOCKTRUE_INTERNAL_PROCESS_WAS_CALLED));
        Set<String> setT= trueModule.getSetTag();
        Set<String> unsetT= trueModule.getUnsetTag();

        for (String t: setT){
View Full Code Here

    }

    @TestInfo(testType = TestInfo.TestType.UNIT)
    public void testFalseInternalProcess() throws Exception {
        MockFalseModule module= new MockFalseModule(MOCK_FALSE);
        FetchDocument doc= new FetchDocument(new Page("",1f));
        assertFalse(doc.hasTag(MockFalseModule.TAG_MOCKFALSE_INTERNAL_PROCESS_WAS_CALLED));
        Set<String> setT= module.getSetTag();
        Set<String> unsetT= module.getUnsetTag();
        for (String t: setT){
            doc.delTag(t);
View Full Code Here

        FileUtil.deleteFile(tmpConfig);
    }
   
    @TestInfo(testType = TestInfo.TestType.UNIT)
    public void testPassThroughOnTags() throws Exception {
        FetchDocument doc= new FetchDocument(new Page("",1f));
       
        MockPassOnTagModule mm1= new MockPassOnTagModule(MOCK_PASS);       
        Set<String> passT= mm1.getPassThroughOnTags();
        assertTrue(passT.size() > 0); // else we are checking nothing
        for (String t: passT){
            doc.addTag(t);
        }       
        mm1.process(doc);
       
        doc= new FetchDocument(new Page("",1f));
        MockPassOnTagMissingModule mm2= new MockPassOnTagMissingModule(MOCK_MISS);       
        // Make sure, this tag is not defined. Else will not passThrough, and
        // the test will fail:
        passT= mm2.getPassThroughOnMissingTags();
        assertTrue(passT.size() > 0); // else we are checking nothing
        for (String t: passT){
            doc.delTag(t);
        }
        mm2.process(doc);
               

        MockModule mm3= new MockModule(MOCK_DO);
        doc= new FetchDocument(new Page("",1f));
        // Make sure, this tag is not defined. Else the test will be false
        doc.delTag(MockModule.TAG_MOCK3_INTERNAL_PROCESS_WAS_CALLED);
        assertFalse(doc.hasTag(MockModule.TAG_MOCK3_INTERNAL_PROCESS_WAS_CALLED));
        mm3.process(doc);
        assertTrue(doc.hasTag(MockModule.TAG_MOCK3_INTERNAL_PROCESS_WAS_CALLED));
View Full Code Here

   
    @TestInfo(testType = TestInfo.TestType.UNIT)
    public void testSpanish() throws Exception {
        FetchDocument doc;
        String lang= LanguageDetectionModule.LANGUAGE_TAG+"es";
        doc= new FetchDocument(new Page("",1f), null, SP_TEXT1, null, null, null, null,
                true, false, false, true);
        assertEquals(0,doc.getTags().size());
        ldm.process(doc);
        assertEquals(1,doc.getTags().size());
        assertTrue(doc.getTags().contains(lang));
       
        doc= new FetchDocument(new Page("",1f), null, SP_TEXT2, null, null, null, null,
                true, false, false, true);
        assertEquals(0,doc.getTags().size());
        ldm.process(doc);
        assertEquals(1,doc.getTags().size());
        assertTrue(doc.getTags().contains(lang));

        doc= new FetchDocument(new Page("",1f), null, SP_TEXT3, null, null, null, null,
                true, false, false, true);
        assertEquals(0,doc.getTags().size());
        ldm.process(doc);
        assertEquals(1,doc.getTags().size());
        assertTrue(doc.getTags().contains(lang));
View Full Code Here

   
    @TestInfo(testType = TestInfo.TestType.UNIT)
    public void testEnglish() throws Exception {
        FetchDocument doc;
        String lang= LanguageDetectionModule.LANGUAGE_TAG+"en";
        doc= new FetchDocument(new Page("",1f), null, EN_TEXT1, null, null, null, null,
                true, false, false, true);
        assertEquals(0,doc.getTags().size());
        ldm.process(doc);
        assertEquals(1,doc.getTags().size());
        assertTrue(doc.getTags().contains(lang));
       
        doc= new FetchDocument(new Page("",1f), null, EN_TEXT2, null, null, null, null,
                true, false, false, true);
        assertEquals(0,doc.getTags().size());
        ldm.process(doc);
        assertEquals(1,doc.getTags().size());
        assertTrue(doc.getTags().contains(lang));

        doc= new FetchDocument(new Page("",1f), null, EN_TEXT3, null, null, null, null,
                true, false, false, true);
        assertEquals(0,doc.getTags().size());
        ldm.process(doc);
        assertEquals(1,doc.getTags().size());
        assertTrue(doc.getTags().contains(lang));
View Full Code Here

        Link[] allAr={AR_1, AR_2, AR_3, AR_4};
        Link[] allOt={OT_1, OT_2, OT_3, OT_4};
        Link[] allGl={GL_1, GL_2, GL_3, GL_4};
        Link[] allOg={OG_1, OG_2, OG_3, OG_4};

        doc= new FetchDocument(new Page("",1f), null, null, null, allAr, null, null, true, false, false, true);
        res= module.tInternalProcess(doc);
        assertEquals(res, Double.MAX_VALUE);

        doc= new FetchDocument(new Page("",1f), null, null, null, allOt, null, null, true, false, false, true);
        res= module.tInternalProcess(doc);
        assertEquals(res, 0.0);

        doc= new FetchDocument(new Page("",1f), null, null, null, allGl, null, null, true, false, false, true);
        res= module.tInternalProcess(doc);
        assertEquals(res, 0.0);

        doc= new FetchDocument(new Page("",1f), null, null, null, allOg, null, null, true, false, false, true);
        res= module.tInternalProcess(doc);
        assertEquals(res, 0.0);       
    }
View Full Code Here

        Link[] a075={AR_1, OT_2, AR_3, AR_4};
        Link[] a050={AR_1, AR_2, OT_3, OT_4};
        Link[] a025={OT_2, AR_2, OT_3, OT_4};
        Link[] a000={OT_1, OT_2, OT_3, OT_4};

        doc= new FetchDocument(new Page("",1f), null, null, null, a075, null, null, true, false, false, true);
        res= module.tInternalProcess(doc);
        assertEquals(res, 3/1.0);

        doc= new FetchDocument(new Page("",1f), null, null, null, a050, null, null, true, false, false, true);
        res= module.tInternalProcess(doc);
        assertEquals(res, 2/2.0);

        doc= new FetchDocument(new Page("",1f), null, null, null, a025, null, null, true, false, false, true);
        res= module.tInternalProcess(doc);
        assertEquals(res, 1/3.0);

        doc= new FetchDocument(new Page("",1f), null, null, null, a000, null, null, true, false, false, true);
        res= module.tInternalProcess(doc);
        assertEquals(res, 0.0);       
    }
View Full Code Here

        Link[] a075={AR_1, OT_2, AR_3, AR_4, GL_1};
        Link[] a050={AR_1, OG_3, OG_4, AR_2, OT_3, OT_4, GL_2, GL_1};
        Link[] a025={OT_2, GL_3, AR_2, OT_3, OT_4, GL_1};
        Link[] a000={GL_2, OT_1, OT_2, OT_3, OT_4, GL_4, OG_1, OG_2};

        doc= new FetchDocument(new Page("",1f), null, null, null, a075, null, null, true, false, false, true);
        res= module.tInternalProcess(doc);
        assertEquals(res, 3/1.0);

        doc= new FetchDocument(new Page("",1f), null, null, null, a050, null, null, true, false, false, true);
        res= module.tInternalProcess(doc);
        assertEquals(res, 2/2.0);

        doc= new FetchDocument(new Page("",1f), null, null, null, a025, null, null, true, false, false, true);
        res= module.tInternalProcess(doc);
        assertEquals(res, 1/3.0);

        doc= new FetchDocument(new Page("",1f), null, null, null, a000, null, null, true, false, false, true);
        res= module.tInternalProcess(doc);
        assertEquals(res, 0.0);       
    }
View Full Code Here

        Link[] a075={AR_1, OT_2, KA_1, AR_4, GL_1};
        Link[] a050={KA_1, KA_2, OT_3, OT_4, GL_2, GL_1};
        Link[] a025={OT_2, GL_3, KA_2, OT_3, OT_4, GL_1};
        Link[] a000={GL_2, OT_1, OT_2, OT_3, OT_4, GL_4};

        doc= new FetchDocument(new Page("",1f), null, null, null, a075, null, null, true, false, false, true);
        res= module.tInternalProcess(doc);
        assertEquals(3/1.0, res);

        doc= new FetchDocument(new Page("",1f), null, null, null, a050, null, null, true, false, false, true);
        res= module.tInternalProcess(doc);
        assertEquals(2/2.0, res);

        doc= new FetchDocument(new Page("",1f), null, null, null, a025, null, null, true, false, false, true);
        res= module.tInternalProcess(doc);
        assertEquals(1/3.0, res);

        doc= new FetchDocument(new Page("",1f), null, null, null, a000, null, null, true, false, false, true);
        res= module.tInternalProcess(doc);
        assertEquals(0.0, res);       
    }
View Full Code Here

TOP

Related Classes of com.flaptor.hounder.crawler.pagedb.Page

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.