Package bixo.config

Examples of bixo.config.BixoPlatform.makePath()


            FetchPipe fetchPipe = new FetchPipe(importPipe, scorer, fetcher, 1);

            // Create the output, which is a dual file sink tap.
            String output = "build/test/RunFakeFetchPipe/dual";
            BasePath outputPath = platform.makePath(output);
            BasePath statusPath = platform.makePath(outputPath, "status");
            Tap status = platform.makeTap(platform.makeTextScheme(), statusPath, SinkMode.REPLACE);

            BasePath contentPath = platform.makePath(outputPath, "content");
            Tap content = platform.makeTap(platform.makeTextScheme(), contentPath, SinkMode.REPLACE);
           
View Full Code Here


            String output = "build/test/RunFakeFetchPipe/dual";
            BasePath outputPath = platform.makePath(output);
            BasePath statusPath = platform.makePath(outputPath, "status");
            Tap status = platform.makeTap(platform.makeTextScheme(), statusPath, SinkMode.REPLACE);

            BasePath contentPath = platform.makePath(outputPath, "content");
            Tap content = platform.makeTap(platform.makeTextScheme(), contentPath, SinkMode.REPLACE);
           
            // Finally we can run it.
            FlowConnector flowConnector = platform.makeFlowConnector();
            Flow flow = flowConnector.connect(in, FetchPipe.makeSinkMap(status, content), fetchPipe);
View Full Code Here

        BixoPlatform platform = new BixoPlatform(ParsePipeTest.class, Platform.Local);
       

        Pipe pipe = new Pipe("parse_source");
        ParsePipe parserPipe = new ParsePipe(pipe, new SimpleParser());
        BasePath inputPath = platform.makePath("build/test/ParserPipeTest/in");
        Tap in = platform.makeTap(platform.makeBinaryScheme(FetchedDatum.FIELDS), inputPath);
        BasePath outputPath = platform.makePath("build/test/ParserPipeTest/out");
        Tap out = platform.makeTap(platform.makeBinaryScheme(ParsedDatum.FIELDS), outputPath, SinkMode.REPLACE);

        TupleEntryCollector write = in.openForWrite(platform.makeFlowProcess());
View Full Code Here

        Pipe pipe = new Pipe("parse_source");
        ParsePipe parserPipe = new ParsePipe(pipe, new SimpleParser());
        BasePath inputPath = platform.makePath("build/test/ParserPipeTest/in");
        Tap in = platform.makeTap(platform.makeBinaryScheme(FetchedDatum.FIELDS), inputPath);
        BasePath outputPath = platform.makePath("build/test/ParserPipeTest/out");
        Tap out = platform.makeTap(platform.makeBinaryScheme(ParsedDatum.FIELDS), outputPath, SinkMode.REPLACE);

        TupleEntryCollector write = in.openForWrite(platform.makeFlowProcess());

        ArchiveReader archiveReader = ArchiveReaderFactory.get("src/test/resources/someHtml.arc");
View Full Code Here

        // Build and run the flow.
       
        try {
            BixoPlatform platform = new BixoPlatform(DemoWebMiningTool.class, options.getPlatformMode());
            BasePath workingDirPath = platform.makePath(options.getWorkingDir());

            setupWorkingDir(platform, workingDirPath, CrawlConfig.SEED_URLS_FILENAME);
            BasePath latestDirPath = CrawlDirUtils.findLatestLoopDir(platform, workingDirPath);
            if (latestDirPath == null) {
View Full Code Here

            BasePath latestDirPath = CrawlDirUtils.findLatestLoopDir(platform, workingDirPath);
            if (latestDirPath == null) {
                error("No previous cycle output dirs exist in " + workingDirPath, parser);
            }
           
            BasePath crawlDbPath = platform.makePath(latestDirPath, CrawlConfig.CRAWLDB_SUBDIR_NAME);
           
            UserAgent userAgent = new UserAgent(options.getAgentName(), CrawlConfig.EMAIL_ADDRESS, CrawlConfig.WEB_ADDRESS);
           
            FetcherPolicy fetcherPolicy = new FetcherPolicy();
            fetcherPolicy.setCrawlDelay(CrawlConfig.DEFAULT_CRAWL_DELAY);
View Full Code Here

                BasePath curLoopDirPath = CrawlDirUtils.makeLoopDir(platform, workingDirPath, curLoop);
                Flow flow = DemoWebMiningWorkflow.createWebMiningWorkflow(platform, crawlDbPath, curLoopDirPath, fetcherPolicy, userAgent, options);
                flow.complete();

                // Update crawlDbPath to point to the latest crawl db
                crawlDbPath = platform.makePath(curLoopDirPath, CrawlConfig.CRAWLDB_SUBDIR_NAME);
            }
           
        } catch (Exception e) {
            System.err.println("Exception running job: " + e.getMessage());
            e.printStackTrace(System.err);
View Full Code Here

       
        try {
            BixoPlatform platform = new BixoPlatform(AnalyzeMbox.class, options.getPlatformMode());
            // Create the input (source tap), which is just a sequence file reader. We assume
          // that the file already has the results of splitting the mbox file into emails.
            BasePath inputPath = platform.makePath(inputFileName);
            platform.assertPathExists(inputPath, "input file");
            Tap sourceTap = platform.makeTap(platform.makeBinaryScheme(FetchedDatum.FIELDS), inputPath);
           
            Pipe pipe = new Pipe("Email Analyzer");
            pipe = new Each(pipe, new ParseEmailFunction());
View Full Code Here

            // And let's sort in reverse order (high to low score)
            pipe = new GroupBy(pipe, new Fields(FieldNames.SUMMED_SCORE), true);

            // Create the output (sink tap)
            Tap sinkTap = platform.makeTap(platform.makeTextScheme(),
                            platform.makePath(outputDirName), SinkMode.REPLACE);
           
            // Finally we can run it.
            FlowConnector flowConnector = platform.makeFlowConnector();
            Flow flow = flowConnector.connect(sourceTap, sinkTap, pipe);
            flow.complete();
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.