Examples of makePath()


Examples of bixo.config.BixoPlatform.makePath()

        long fetchTime = System.currentTimeMillis();
        PartitioningKey groupingKey = new PartitioningKey("key", 1);
        FetchSetDatum pfd = new FetchSetDatum(urls, fetchTime, 1000, groupingKey.getValue(), groupingKey.getRef());
       
        BixoPlatform platform = new BixoPlatform(ScoredUrlDatumTest.class, platformMode);
        BasePath path = platform.makePath("build/test/ScoredUrlDatumTest/testCascadingSerialization/in");
        Tap in = platform.makeTap(platform.makeBinaryScheme(FetchSetDatum.FIELDS), path, SinkMode.REPLACE);
        TupleEntryCollector write = in.openForWrite(platform.makeFlowProcess());
        write.add(pfd.getTuple());
        write.close();
    }
View Full Code Here

Examples of bixo.config.BixoPlatform.makePath()

            curLoopDirPath = CrawlDirUtils.makeLoopDir(platform, baseDirPath, 2);

            flow = DemoCrawlWorkflow.createFlow(curLoopDirPath, crawlDbPath, defaultPolicy, userAgent, urlFilter, options);
            flow.complete();
            // Update crawldb path
            crawlDbPath = platform.makePath(curLoopDirPath, CrawlConfig.CRAWLDB_SUBDIR_NAME);

            crawldbTap = platform.makeTap(platform.makeBinaryScheme(CrawlDbDatum.FIELDS), crawlDbPath);
            iter = crawldbTap.openForRead(platform.makeFlowProcess());

            numFetched = 0;
View Full Code Here

Examples of bixo.config.BixoPlatform.makePath()

        options.setWorkingDir(WORKING_DIR);
        options.setAgentName("test-agent");
        options.setLocalPlatformMode(true);
       
        BixoPlatform platform = new BixoPlatform(DemoWebMiningWorkflowTest.class, options.getPlatformMode());
        BasePath workingDirPath = platform.makePath(WORKING_DIR);
        DemoWebMiningTool.setupWorkingDir(platform, workingDirPath, "/test-seedurls.txt");
       
        BasePath latestDirPath = CrawlDirUtils.findLatestLoopDir(platform, workingDirPath);
        BasePath crawlDbPath = platform.makePath(latestDirPath, CrawlConfig.CRAWLDB_SUBDIR_NAME);
       
View Full Code Here

Examples of bixo.config.BixoPlatform.makePath()

        BixoPlatform platform = new BixoPlatform(DemoWebMiningWorkflowTest.class, options.getPlatformMode());
        BasePath workingDirPath = platform.makePath(WORKING_DIR);
        DemoWebMiningTool.setupWorkingDir(platform, workingDirPath, "/test-seedurls.txt");
       
        BasePath latestDirPath = CrawlDirUtils.findLatestLoopDir(platform, workingDirPath);
        BasePath crawlDbPath = platform.makePath(latestDirPath, CrawlConfig.CRAWLDB_SUBDIR_NAME);
       
        FetcherPolicy fetcherPolicy = new FetcherPolicy();
        fetcherPolicy.setCrawlDelay(CrawlConfig.DEFAULT_CRAWL_DELAY);
        fetcherPolicy.setMaxContentSize(CrawlConfig.MAX_CONTENT_SIZE);
        fetcherPolicy.setFetcherMode(FetcherMode.EFFICIENT);
View Full Code Here

Examples of bixo.config.BixoPlatform.makePath()

            Flow flow = DemoWebMiningWorkflow.createWebMiningWorkflow(platform, crawlDbPath, curLoopDirPath, fetcherPolicy, userAgent, options);
            flow.complete();
       
            // validate
            BasePath statusPath = platform.makePath(curLoopDirPath, CrawlConfig.STATUS_SUBDIR_NAME);
            validateEntryCount(platform, statusPath, null, 1, "status", true);
   
            BasePath contentPath = platform.makePath(curLoopDirPath, CrawlConfig.CONTENT_SUBDIR_NAME);
            validateEntryCount(platform, contentPath, FetchedDatum.FIELDS, 1, "content", false);
View Full Code Here

Examples of bixo.config.BixoPlatform.makePath()

       
            // validate
            BasePath statusPath = platform.makePath(curLoopDirPath, CrawlConfig.STATUS_SUBDIR_NAME);
            validateEntryCount(platform, statusPath, null, 1, "status", true);
   
            BasePath contentPath = platform.makePath(curLoopDirPath, CrawlConfig.CONTENT_SUBDIR_NAME);
            validateEntryCount(platform, contentPath, FetchedDatum.FIELDS, 1, "content", false);

            crawlDbPath = platform.makePath(curLoopDirPath, CrawlConfig.CRAWLDB_SUBDIR_NAME);
            validateEntryCount(platform, crawlDbPath, null, 3, "crawldb", true);
           
View Full Code Here

Examples of bixo.config.BixoPlatform.makePath()

            validateEntryCount(platform, statusPath, null, 1, "status", true);
   
            BasePath contentPath = platform.makePath(curLoopDirPath, CrawlConfig.CONTENT_SUBDIR_NAME);
            validateEntryCount(platform, contentPath, FetchedDatum.FIELDS, 1, "content", false);

            crawlDbPath = platform.makePath(curLoopDirPath, CrawlConfig.CRAWLDB_SUBDIR_NAME);
            validateEntryCount(platform, crawlDbPath, null, 3, "crawldb", true);
           
            // run the second loop
            curLoopDirPath =  CrawlDirUtils.makeLoopDir(platform, workingDirPath, 2);
            flow = DemoWebMiningWorkflow.createWebMiningWorkflow(platform, crawlDbPath, curLoopDirPath, fetcherPolicy, userAgent, options);
View Full Code Here

Examples of bixo.config.BixoPlatform.makePath()

            Pipe pipe = new Pipe("urls");
            pipe = new Each(pipe, new UrlLengthener(fetcher));
            pipe = new Each(pipe, new Debug());

            BixoPlatform platform = new BixoPlatform(LengthenUrlsTool.class, Platform.Local);
            BasePath filePath = platform.makePath(filename);
            TextLine textLineLocalScheme = new TextLine(new Fields("url"));
            Tap sourceTap = platform.makeTap(textLineLocalScheme, filePath, SinkMode.KEEP);
            SinkTap sinkTap = new NullSinkTap(new Fields("url"));
           
            FlowConnector flowConnector = platform.makeFlowConnector();
View Full Code Here

Examples of bixo.config.BixoPlatform.makePath()

            curLoopDirPath =  CrawlDirUtils.makeLoopDir(platform, workingDirPath, 2);
            flow = DemoWebMiningWorkflow.createWebMiningWorkflow(platform, crawlDbPath, curLoopDirPath, fetcherPolicy, userAgent, options);
            flow.complete();
           
            // validate
            statusPath = platform.makePath(curLoopDirPath, CrawlConfig.STATUS_SUBDIR_NAME);
            validateEntryCount(platform, statusPath, null, 2, "status", true);
   
            contentPath = platform.makePath(curLoopDirPath, CrawlConfig.CONTENT_SUBDIR_NAME);
            validateEntryCount(platform, contentPath, FetchedDatum.FIELDS, 2, "content", false);
View Full Code Here

Examples of bixo.config.BixoPlatform.makePath()

           
            // validate
            statusPath = platform.makePath(curLoopDirPath, CrawlConfig.STATUS_SUBDIR_NAME);
            validateEntryCount(platform, statusPath, null, 2, "status", true);
   
            contentPath = platform.makePath(curLoopDirPath, CrawlConfig.CONTENT_SUBDIR_NAME);
            validateEntryCount(platform, contentPath, FetchedDatum.FIELDS, 2, "content", false);

            crawlDbPath = platform.makePath(curLoopDirPath, CrawlConfig.CRAWLDB_SUBDIR_NAME);
            validateEntryCount(platform, crawlDbPath, null, 8, "crawldb", true);
            assertTrue(validatePageScores(platform, crawlDbPath));
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.