Package com.flaptor.hounder.crawler.pagedb

Examples of com.flaptor.hounder.crawler.pagedb.Page


        assertTrue(cbv.hasValue(doc));
    }

    @TestInfo(testType = TestInfo.TestType.UNIT)
    public void testGetValue() throws Exception {
        FetchDocument doc = new FetchDocument(new Page("",1f));
        assertTrue(new Double(1000).equals(cbv.getValue(doc)));
    }
View Full Code Here


        ArrayList<NodeAddress> nodes = new ArrayList<NodeAddress>();
        nodes.add(node);
        PageCatcher localCatcher = new PageCatcher(tmpDir+"/catchdb");
        localCatcher.start(node);
        PageDistributor distributor = new PageDistributor(nodes, node, mapper);
        Page page1 = PageTest.randomPage();
        page1.setUrl("http://example.com/test0=0");
        IRemotePageCatcher stubCatcher= distributor.getCatcher(page1);
        stubCatcher.addPage(page1);
        PageDB db = localCatcher.getCatch();
        db.open(PageDB.READ);
        Iterator<Page> pages = db.iterator();
        assertTrue("The page sent through rmi did not survive the adventure.", pages.hasNext());
        Page page2 = pages.next();
        assertTrue("The page has been changed by the trip through rmi:\n  1: "+page1+"\n  2: "+page2, page1.equals(page2));
        assertFalse("Sent one page through rmi and more than one came out the other end.", pages.hasNext());
        db.close();
    }
View Full Code Here

            Execute.sleep(1000);
            config.set("pagedb.node.list", "127.0.0.1:1091, 127.0.0.1:1090");
            new DBCreator(1,catchers[1]).start();
            while (done < 2) Execute.sleep(100);

            Page page00 = PageTest.randomPage();
            Page page01 = PageTest.randomPage();
            Page page10 = PageTest.randomPage();
            Page page11 = PageTest.randomPage();

            page00.setUrl("http://example-"+tst+".com/test0=0");
            page01.setUrl("http://example-"+tst+".com/test0=1");
            page10.setUrl("http://example-"+tst+".com/test1=0");
            page11.setUrl("http://example-"+tst+".com/test1=1");

            dbs[0].addPage(page00);
            dbs[0].addPage(page01);
            dbs[1].addPage(page10);
            dbs[1].addPage(page11);

            Execute.sleep(1000);

            done = 0;
            new DBCloser(0).start();
            new DBCloser(1).start();
            while (done < 2) Execute.sleep(100);

            PageDB db = new PageDB(dbNames[0]);
            db.open(DPageDB.READ);
            Iterator<Page> pages = db.iterator();
            assertTrue("The first distributed pagedb should have two pages, yet it has none.", pages.hasNext());
            Page page1 = pages.next();
            assertTrue("One of the pages in the first distributed pagedb is not expected: "+page1.getUrl(), page1.equals(page00) || page1.equals(page10));
            assertTrue("The first distributed pagedb should have two pages, yet it has one.", pages.hasNext());
            Page page2 = pages.next();
            assertTrue("One of the pages in the first distributed pagedb is not expected: "+page2.getUrl(), page2.equals(page00) || page2.equals(page10));
            assertFalse("One of the pages in the first distributed pagedb has been cloned:"+page1.getUrl(), page1.equals(page2));
            assertFalse("The first distributed pagedb should have two pages, yet it has more.", pages.hasNext());
            db.close();

            db = new PageDB(dbNames[1]);
            db.open(DPageDB.READ);
            pages = db.iterator();
            assertTrue("The second distributed pagedb should have two pages, yet it has none.", pages.hasNext());
            page1 = pages.next();
            assertTrue("One of the pages in the second distributed pagedb is not expected: "+page1.getUrl(), page1.equals(page01) || page1.equals(page11));
            assertTrue("The second distributed pagedb should have two pages, yet it has one.", pages.hasNext());
            page2 = pages.next();
            assertTrue("One of the pages in the second distributed pagedb is not expected: "+page2.getUrl(), page2.equals(page01) || page2.equals(page11));
            assertFalse("One of the pages in the second distributed pagedb has been cloned:"+page1.getUrl(), page1.equals(page2));
            assertFalse("The second pagedb should have two pages, yet it has more.", pages.hasNext());
            db.close();
        }
    }
View Full Code Here

            // Create the file
            String url = "http://localhost:8086/test-"+i+".html";
            String text = TestUtils.randomText(25,25);
            TestUtils.writeFile(tmpDir+"/web/test-"+i+".html", text);
            // Add the page to the fetchlist
            Page page = PageTest.randomPage();
            page.setUrl(url);
            fetchlist.addPage(page);
            // Store the data for later recall
            testPages.put(url,text);
        }
View Full Code Here

    }
   
    @TestInfo(testType = TestInfo.TestType.UNIT)
    public void testInternalProcess() throws Exception {
        LoggerModule lm= new LoggerModule(moduleName,globalConfig);
        Page page= new Page("http://loggerModuleTest.url.com", 0);
        FetchDocument doc= new FetchDocument(page);
        doc.addAttribute("attr1", "val1");
        doc.addTag("tag1");
        doc.addCategory("cat1");       
        lm.internalProcess(doc);
View Full Code Here

        FileUtil.deleteFile(tmpConfig);
    }

    @TestInfo(testType = TestInfo.TestType.UNIT)
    public void testBelow() throws Exception {       
        FetchDocument doc= new FetchDocument(new Page("",1f));       
        MockThresholdModule thMod= new MockThresholdModule(THRESH_PROP, globalConfig);

        thMod.setReturnedValue(thMod.thresholdValue - 0.1); // below
        for (String tag : thMod.onBelowUnsetTag){
            if (tag.length() < 1){// if empty, tag="";
View Full Code Here

       
    }

    @TestInfo(testType = TestInfo.TestType.UNIT)
    public void testAbove() throws Exception {       
        FetchDocument doc= new FetchDocument(new Page("",1f));       
        MockThresholdModule thMod= new MockThresholdModule(THRESH_PROP, globalConfig);

        thMod.setReturnedValue(thMod.thresholdValue + 0.1); // above
        for (String tag : thMod.onAboveUnsetTag){
            if (tag.length() < 1){// if empty, tag="";
View Full Code Here

    @TestInfo(testType = TestInfo.TestType.UNIT)
    public void testDeletesEveryCategory() throws Exception {

        moduleConfig.set("categories.order","sites,forums");
        AProcessorModule module = new OverrideCategoryModule(moduleName,globalConfig);
        FetchDocument doc = new FetchDocument(new Page("",1f));
        doc.addCategory("sites");
        doc.addCategory("forums");
        doc.addCategory("nutrition");
        module.process(doc);
View Full Code Here

    @TestInfo(testType = TestInfo.TestType.UNIT)
    public void testLeavesMostImportant() throws Exception {
        moduleConfig.set("categories.order","sites,forums");
        AProcessorModule module = new OverrideCategoryModule(moduleName,globalConfig);
        FetchDocument doc = new FetchDocument(new Page("",1f));
        doc.addCategory("sites");
        doc.addCategory("forums");
        module.process(doc);
       
        assertTrue("categories where " + doc.getCategories() + ". expected forums.",doc.getCategories().size() == 1);
View Full Code Here

    @TestInfo(testType = TestInfo.TestType.UNIT)
    public void testLeavesOnlyCategory() throws Exception {
        moduleConfig.set("categories.order","sites,forums");
        AProcessorModule module = new OverrideCategoryModule(moduleName,globalConfig);
        FetchDocument doc = new FetchDocument(new Page("",1f));
        doc.addCategory("sites");
        module.process(doc);
       
        assertTrue("categories where " + doc.getCategories() + ". expected sites.",doc.getCategories().size() == 1);
        assertTrue("categories where " + doc.getCategories() + ". expected sites.",doc.getCategories().contains("sites"));
View Full Code Here

TOP

Related Classes of com.flaptor.hounder.crawler.pagedb.Page

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.