Examples of makeTap()


Examples of bixo.config.BixoPlatform.makeTap()

        PartitioningKey groupingKey = new PartitioningKey("key", 1);
        FetchSetDatum pfd = new FetchSetDatum(urls, fetchTime, 1000, groupingKey.getValue(), groupingKey.getRef());
       
        BixoPlatform platform = new BixoPlatform(ScoredUrlDatumTest.class, platformMode);
        BasePath path = platform.makePath("build/test/ScoredUrlDatumTest/testCascadingSerialization/in");
        Tap in = platform.makeTap(platform.makeBinaryScheme(FetchSetDatum.FIELDS), path, SinkMode.REPLACE);
        TupleEntryCollector write = in.openForWrite(platform.makeFlowProcess());
        write.add(pfd.getTuple());
        write.close();
    }
}
View Full Code Here

Examples of bixo.config.BixoPlatform.makeTap()

            flow = DemoCrawlWorkflow.createFlow(curLoopDirPath, crawlDbPath, defaultPolicy, userAgent, urlFilter, options);
            flow.complete();
            // Update crawldb path
            crawlDbPath = platform.makePath(curLoopDirPath, CrawlConfig.CRAWLDB_SUBDIR_NAME);

            crawldbTap = platform.makeTap(platform.makeBinaryScheme(CrawlDbDatum.FIELDS), crawlDbPath);
            iter = crawldbTap.openForRead(platform.makeFlowProcess());

            numFetched = 0;
            numPending = 0;
            int numDepth0 = 0;
View Full Code Here

Examples of bixo.config.BixoPlatform.makeTap()

            pipe = new Each(pipe, new Debug());

            BixoPlatform platform = new BixoPlatform(LengthenUrlsTool.class, Platform.Local);
            BasePath filePath = platform.makePath(filename);
            TextLine textLineLocalScheme = new TextLine(new Fields("url"));
            Tap sourceTap = platform.makeTap(textLineLocalScheme, filePath, SinkMode.KEEP);
            SinkTap sinkTap = new NullSinkTap(new Fields("url"));
           
            FlowConnector flowConnector = platform.makeFlowConnector();
            Flow flow = flowConnector.connect(sourceTap, sinkTap, pipe);
View Full Code Here

Examples of bixo.config.BixoPlatform.makeTap()

       
        createDataFile(platform, unfetchedDatumsPath, unfetchedDatums);

       
        // create a workflow
        Tap inputSource1 = platform.makeTap(platform.makeBinaryScheme(UrlDatum.FIELDS), fetchedDatumsPath);
        Pipe fetchedPipe = new Pipe("fetched");
        Tap inputSource2 = platform.makeTap(platform.makeBinaryScheme(UrlDatum.FIELDS), unfetchedDatumsPath);
        Pipe unfetchedPipe = new Pipe("unfetched");

        Map<String, Tap> sources = new HashMap<String, Tap>();
View Full Code Here

Examples of bixo.config.BixoPlatform.makeTap()

       
        // create a workflow
        Tap inputSource1 = platform.makeTap(platform.makeBinaryScheme(UrlDatum.FIELDS), fetchedDatumsPath);
        Pipe fetchedPipe = new Pipe("fetched");
        Tap inputSource2 = platform.makeTap(platform.makeBinaryScheme(UrlDatum.FIELDS), unfetchedDatumsPath);
        Pipe unfetchedPipe = new Pipe("unfetched");

        Map<String, Tap> sources = new HashMap<String, Tap>();
        sources.put(fetchedPipe.getName(), inputSource1);
        sources.put(unfetchedPipe.getName(), inputSource2);
View Full Code Here

Examples of bixo.config.BixoPlatform.makeTap()

        Map<String, Tap> sources = new HashMap<String, Tap>();
        sources.put(fetchedPipe.getName(), inputSource1);
        sources.put(unfetchedPipe.getName(), inputSource2);

        BasePath resultsPath = platform.makePath(workingDirPath, "results");
        Tap resultSink = platform.makeTap(platform.makeBinaryScheme(UrlDatum.FIELDS), resultsPath, SinkMode.REPLACE);

        Pipe resultsPipe = new GroupBy("results pipe", Pipe.pipes(fetchedPipe, unfetchedPipe),
                        new Fields(UrlDatum.URL_FN));
        resultsPipe = new Every(resultsPipe, new LatestUrlDatumBuffer(), Fields.RESULTS);
View Full Code Here

Examples of bixo.config.BixoPlatform.makeTap()

        // Input : the crawldb
        platform.assertPathExists(crawlDbPath, "CrawlDb doesn't exist");

        // Our crawl db is defined by the CrawlDbDatum
        Tap inputSource = platform.makeTap(platform.makeBinaryScheme(CrawlDbDatum.FIELDS), crawlDbPath);
        Pipe importPipe = new Pipe("import pipe");

        // Split into tuples that are to be fetched and that have already been fetched
        SplitterAssembly splitter = new SplitterAssembly(importPipe, new SplitFetchedUnfetchedCrawlDatums());
View Full Code Here

Examples of bixo.config.BixoPlatform.makeTap()

        Flow flow = flowConnector.connect(sources, resultSink, resultsPipe);
        flow.complete();
       
        // verify that the resulting pipe has the latest tuple
       
        Tap testSink = platform.makeTap(platform.makeBinaryScheme(UrlDatum.FIELDS), resultsPath);
        TupleEntryIterator reader = testSink.openForRead(platform.makeFlowProcess());
        int count = 0;
        long latest = 0;
        while (reader.hasNext()) {
            TupleEntry next = reader.next();
View Full Code Here

Examples of bixo.config.BixoPlatform.makeTap()

        //      crawldb
        //      content
        //      parse
        //      status
        BasePath outCrawlDbPath = platform.makePath(curWorkingDirPath, CrawlConfig.CRAWLDB_SUBDIR_NAME);
        Tap loopCrawldbSink = platform.makeTap(platform.makeBinaryScheme(CrawlDbDatum.FIELDS), outCrawlDbPath, SinkMode.REPLACE);

        BasePath contentDirPath = platform.makePath(curWorkingDirPath, CrawlConfig.CONTENT_SUBDIR_NAME);
        Tap contentSink = platform.makeTap(platform.makeBinaryScheme(FetchedDatum.FIELDS), contentDirPath, SinkMode.REPLACE);

        BasePath parseDirPath = platform.makePath(curWorkingDirPath, CrawlConfig.PARSE_SUBDIR_NAME);
View Full Code Here

Examples of bixo.config.BixoPlatform.makeTap()

        //      status
        BasePath outCrawlDbPath = platform.makePath(curWorkingDirPath, CrawlConfig.CRAWLDB_SUBDIR_NAME);
        Tap loopCrawldbSink = platform.makeTap(platform.makeBinaryScheme(CrawlDbDatum.FIELDS), outCrawlDbPath, SinkMode.REPLACE);

        BasePath contentDirPath = platform.makePath(curWorkingDirPath, CrawlConfig.CONTENT_SUBDIR_NAME);
        Tap contentSink = platform.makeTap(platform.makeBinaryScheme(FetchedDatum.FIELDS), contentDirPath, SinkMode.REPLACE);

        BasePath parseDirPath = platform.makePath(curWorkingDirPath, CrawlConfig.PARSE_SUBDIR_NAME);
        Tap parseSink = platform.makeTap(platform.makeBinaryScheme(ParsedDatum.FIELDS), parseDirPath, SinkMode.REPLACE);

        BasePath statusDirPath = platform.makePath(curWorkingDirPath, CrawlConfig.STATUS_SUBDIR_NAME);
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.