Examples of makePath()


Examples of bixo.config.BixoPlatform.makePath()

        // For now we only do it when we are running in Hadoop mode
          Tap writableSeqFileSink = null;
          Pipe writableSeqFileDataPipe = null;
            if (!options.isLocalPlatformMode()) {
                writableSeqFileDataPipe = new Pipe("writable seqfile data", new Each(parsePipe.getTailPipe(), new CreateWritableSeqFileData()));
                BasePath writableSeqFileDataPath = platform.makePath(curWorkingDirPath, CrawlConfig.EXTRACTED_TEXT_SUBDIR_NAME);
                WritableSequenceFile writableSeqScheme = new WritableSequenceFile(new Fields(CrawlConfig.WRITABLE_SEQ_FILE_KEY_FN, CrawlConfig.WRITABLE_SEQ_FILE_VALUE_FN), Text.class, Text.class);
                writableSeqFileSink = platform.makeTap(writableSeqScheme, writableSeqFileDataPath, SinkMode.REPLACE);
            }
       
        Pipe urlFromOutlinksPipe = new Pipe("url from outlinks", parsePipe.getTailPipe());
View Full Code Here

Examples of bixo.config.BixoPlatform.makePath()

        String outputDirName = options.getOutputDir();

        try {
            BixoPlatform platform = new BixoPlatform(AnalyzeEmail.class, options.getPlatformMode());
            // Create the input (source tap), which is just a text file reader
            BasePath inputPath = platform.makePath(inputFileName);
            Tap sourceTap = platform.makeTap(platform.makeTextScheme(), inputPath);
           
            // Create the sub-assembly that runs the fetch job
            UserAgent userAgent = new UserAgent(options.getAgentName(), EMAIL_ADDRESS, WEB_ADDRESS);
            Pipe importPipe = new Each("url importer", new Fields("line"), new LoadUrlFunction());
View Full Code Here

Examples of bixo.config.BixoPlatform.makePath()

           
            // And let's sort in reverse order (high to low score)
            analysisPipe = new GroupBy(analysisPipe, new Fields(FieldNames.SUMMED_SCORE), true);

            // Create the sink taps
            BasePath outputPath = platform.makePath(outputDirName);
            Tap pageStatusSinkTap = platform.makeTap(platform.makeTextScheme(),
                            platform.makePath(outputPath, "page-status"), SinkMode.REPLACE);
            Tap mboxStatusSinkTap = platform.makeTap(platform.makeTextScheme(),
                            platform.makePath(outputPath, "mbox-status"), SinkMode.REPLACE);
            Tap contentSinkTap = platform.makeTap(platform.makeBinaryScheme(FetchedDatum.FIELDS),
View Full Code Here

Examples of bixo.config.BixoPlatform.makePath()

            analysisPipe = new GroupBy(analysisPipe, new Fields(FieldNames.SUMMED_SCORE), true);

            // Create the sink taps
            BasePath outputPath = platform.makePath(outputDirName);
            Tap pageStatusSinkTap = platform.makeTap(platform.makeTextScheme(),
                            platform.makePath(outputPath, "page-status"), SinkMode.REPLACE);
            Tap mboxStatusSinkTap = platform.makeTap(platform.makeTextScheme(),
                            platform.makePath(outputPath, "mbox-status"), SinkMode.REPLACE);
            Tap contentSinkTap = platform.makeTap(platform.makeBinaryScheme(FetchedDatum.FIELDS),
                            platform.makePath(outputPath, "content"), SinkMode.REPLACE);
            Tap analyzerSinkTap = platform.makeTap(platform.makeTextScheme(),
View Full Code Here

Examples of bixo.config.BixoPlatform.makePath()

            // Create the sink taps
            BasePath outputPath = platform.makePath(outputDirName);
            Tap pageStatusSinkTap = platform.makeTap(platform.makeTextScheme(),
                            platform.makePath(outputPath, "page-status"), SinkMode.REPLACE);
            Tap mboxStatusSinkTap = platform.makeTap(platform.makeTextScheme(),
                            platform.makePath(outputPath, "mbox-status"), SinkMode.REPLACE);
            Tap contentSinkTap = platform.makeTap(platform.makeBinaryScheme(FetchedDatum.FIELDS),
                            platform.makePath(outputPath, "content"), SinkMode.REPLACE);
            Tap analyzerSinkTap = platform.makeTap(platform.makeTextScheme(),
                            platform.makePath(outputPath, "analysis"), SinkMode.REPLACE);
View Full Code Here

Examples of bixo.config.BixoPlatform.makePath()

            Tap pageStatusSinkTap = platform.makeTap(platform.makeTextScheme(),
                            platform.makePath(outputPath, "page-status"), SinkMode.REPLACE);
            Tap mboxStatusSinkTap = platform.makeTap(platform.makeTextScheme(),
                            platform.makePath(outputPath, "mbox-status"), SinkMode.REPLACE);
            Tap contentSinkTap = platform.makeTap(platform.makeBinaryScheme(FetchedDatum.FIELDS),
                            platform.makePath(outputPath, "content"), SinkMode.REPLACE);
            Tap analyzerSinkTap = platform.makeTap(platform.makeTextScheme(),
                            platform.makePath(outputPath, "analysis"), SinkMode.REPLACE);

            HashMap<String, Tap> sinkTapMap = new HashMap<String, Tap>(2);
            sinkTapMap.put(MBOX_PAGE_STATUS_PIPE_NAME, pageStatusSinkTap);
View Full Code Here

Examples of bixo.config.BixoPlatform.makePath()

            Tap mboxStatusSinkTap = platform.makeTap(platform.makeTextScheme(),
                            platform.makePath(outputPath, "mbox-status"), SinkMode.REPLACE);
            Tap contentSinkTap = platform.makeTap(platform.makeBinaryScheme(FetchedDatum.FIELDS),
                            platform.makePath(outputPath, "content"), SinkMode.REPLACE);
            Tap analyzerSinkTap = platform.makeTap(platform.makeTextScheme(),
                            platform.makePath(outputPath, "analysis"), SinkMode.REPLACE);

            HashMap<String, Tap> sinkTapMap = new HashMap<String, Tap>(2);
            sinkTapMap.put(MBOX_PAGE_STATUS_PIPE_NAME, pageStatusSinkTap);
            sinkTapMap.put(FetchPipe.STATUS_PIPE_NAME, mboxStatusSinkTap);
            sinkTapMap.put(SPLITTER_PIPE_NAME, contentSinkTap);
View Full Code Here

Examples of bixo.config.BixoPlatform.makePath()

    public void testNotLosingFetchedUrls() throws Throwable {
        String baseDirName = "build/test/DemoCrawlWorkflowLRTest/output";
       
        BixoPlatform platform = new BixoPlatform(DemoCrawlWorkflowLRTest.class, Platform.Local);
       
        BasePath baseDirPath = platform.makePath(baseDirName);
        baseDirPath.delete(true);
        BasePath curLoopDirPath = CrawlDirUtils.makeLoopDir(platform, baseDirPath, 0);
        BasePath crawlDbPath = platform.makePath(curLoopDirPath, CrawlConfig.CRAWLDB_SUBDIR_NAME);

        DemoCrawlTool.importOneDomain(platform, "localhost:8089", crawlDbPath);
View Full Code Here

Examples of bixo.config.BixoPlatform.makePath()

        BixoPlatform platform = new BixoPlatform(DemoCrawlWorkflowLRTest.class, Platform.Local);
       
        BasePath baseDirPath = platform.makePath(baseDirName);
        baseDirPath.delete(true);
        BasePath curLoopDirPath = CrawlDirUtils.makeLoopDir(platform, baseDirPath, 0);
        BasePath crawlDbPath = platform.makePath(curLoopDirPath, CrawlConfig.CRAWLDB_SUBDIR_NAME);

        DemoCrawlTool.importOneDomain(platform, "localhost:8089", crawlDbPath);
        curLoopDirPath = CrawlDirUtils.makeLoopDir(platform, baseDirPath, 1);

        FetcherPolicy defaultPolicy = new FetcherPolicy();
View Full Code Here

Examples of bixo.config.BixoPlatform.makePath()

            server = startServer(new FakeWebSiteHandler(), 8089);
            Flow flow = DemoCrawlWorkflow.createFlow(curLoopDirPath, crawlDbPath, defaultPolicy, userAgent, urlFilter, options);
            flow.complete();

            // Update the crawlDb path
            crawlDbPath = platform.makePath(curLoopDirPath, CrawlConfig.CRAWLDB_SUBDIR_NAME);

            // Now we should have an output/1-<timestamp>/ directory, where the
            // /urls dir has 11 entries with
            // one being previously crawled, and the other 10 being pending.
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.