Package org.apache.any23.configuration

Examples of org.apache.any23.configuration.Configuration


    public static Communicator getCommunicator() throws Exception {
        if (communicator == null) {
            Configuration.loadConfiguration();
            //get lodead configuration
            Configuration config = Configuration.getCurrentConfiguration();
            //initializing communicator based on the configuration
            initializeCommunicator(config.getClusterConfiguration());
            int rmiPort = Configuration.getCurrentConfiguration().getCurrentNodeConfiguration().getRmiRepositoryPort();
            Communicator.getCommunicator().initializeCommunicatorServer(rmiPort);
        }
        return communicator;
    }


    }

    public static void main(String[] args)
    {
        final DownloadReport report = new DownloadReport();
        Configuration conf = new Configuration();
        if (parseArgs(args, conf))
        {
            try
            {
                /* Parse the configuration xml file */
                conf.loadFile((String) conf.get("config_file"));

                /* Load the plugins */
                PluginLoader loader = new PluginLoader((String)(conf.get(Configuration.PLUGIN_FOLDER_TAG)));
                loader.load();

                /* Get the subreddits */
                List<String> subreddits = (List<String>)conf.get(Configuration.SUBREDDITS_ALLOWED_TAG);

                /* Get number of download threads and create the thread pool */
                String downloadThreads = (String)conf.get(Configuration.PARALLEL_DOWNLOAD_TAG);
                int numberOfThreads = downloadThreads == null ? 10 : Integer.valueOf(downloadThreads);
                ExecutorService threadPool = Executors.newFixedThreadPool(numberOfThreads);

                /* Connect to reddit and get the content from the source given */
                System.out.println("Connecting to reddit");
                RedditRequest reddit = new RedditRequest((String) conf.get("reddit_token"));

                System.out.println("Fetching results");

                String sourceUrl = (String) conf.get("source_url");

                String countStr = (String) conf.get("count");
                int count = countStr == null ? 25 : Integer.valueOf(countStr);
                int numberOfBatchs = count / 100 + 1;
                String lastFetched = null;

                for (int i = 0; i < numberOfBatchs; ++i)

    private ByteArrayOutputStream byteOutStream = new ByteArrayOutputStream();

    public WebResponder(Servlet any23servlet, HttpServletResponse response) {
        this.any23servlet = any23servlet;
        this.response = response;
        this.runner = new Any23();
        runner.setHTTPUserAgent("Any23-Servlet");
    }

            String format,
            boolean report, boolean annotate
    ) throws IOException {
        if (in == null) return;
        if (!initRdfWriter(format, report, annotate)) return;
        final ExtractionReport er;
        try {
            er = runner.extract(eps, in, rdfWriter);
            rdfWriter.close();
            if (! er.hasMatchingExtractors() ) {
                sendError(
                        415,
                        "No suitable extractor found for this media type",
                        null,
                        er,
                        report
                );
                return;
            }
        } catch (IOException ioe) {
            // IO Error.
            if (ioe.getCause() != null && ValidatorException.class.equals(ioe.getCause().getClass())) {
                final String errMsg = "Could not fetch input, IO Error.";
                any23servlet.log(errMsg, ioe.getCause());
                sendError(502, errMsg, ioe, null, report);
                return;
            }
            any23servlet.log("Could not fetch input", ioe);
            sendError(502, "Could not fetch input.", ioe, null, report);
            return;
        } catch (ExtractionException e) {
            // Extraction error.
            any23servlet.log("Could not parse input", e);
            sendError(502, "Could not parse input.", e, null, report);
            return;
        } catch (Exception e) {
            any23servlet.log("Internal error", e);
            sendError(500, "Internal error.", e, null, report);
            return;
        }

        /* *** No triples found. *** */
        any23servlet.log("Extraction complete, " + reporter.getTotalTriples() + " triples");
        if (reporter.getTotalTriples() == 0) {
            sendError(
                    501,
                    "Extraction completed. No triples have been found.",
                    null,
                    er, report
            );
            return;
        }

        // Regular response.
        response.setContentType(outputMediaType);
        response.setStatus(200);
        // Set the output encoding equals to the input one.
        final String charsetEncoding = er.getEncoding();
        if (Charset.isSupported(charsetEncoding)) {
            response.setCharacterEncoding(er.getEncoding());
        } else {
            response.setCharacterEncoding("UTF-8");
        }

        final ServletOutputStream sos = response.getOutputStream();

     */
    @Test
    public void testDetectCLIPlugins() throws IOException {
        final Iterator<Tool> tools = manager.getApplicableTools(CRAWLER_TARGET_DIR, CRAWLER_DEPENDENCY_DIR);
        final Set<String> toolClasses = new HashSet<String>();
        Tool tool;
        while(tools.hasNext()) {
            tool = tools.next();
            assertTrue("Found duplicate tool.", toolClasses.add(tool.getClass().getName()));
        }
        assertTrue(
                String.format(
                        "Expected [%s] plugin be detected, but not found int the built classpath",
                        Crawler.class.getName()

                                                     );
        }

        reportingTripleHandler = new ReportingTripleHandler(tripleHandler);

        final Configuration configuration = DefaultConfiguration.singleton();
        extractionParameters =
                pedantic
                        ?
                new ExtractionParameters(configuration, ValidationMode.ValidateAndFix, nestingDisabled)
                        :

                                                     );
        }

        reportingTripleHandler = new ReportingTripleHandler(tripleHandler);

        final Configuration configuration = DefaultConfiguration.singleton();
        extractionParameters =
                pedantic
                        ?
                new ExtractionParameters(configuration, ValidationMode.ValidateAndFix, nestingDisabled)
                        :

        final CompositeTripleHandler cth = new CompositeTripleHandler();
        cth.addChild(rdfxmlWriter);
        cth.addChild(repositoryWriter);

        final ModifiableConfiguration configuration = DefaultConfiguration.copy();
        configuration.setProperty("any23.extraction.metadata.domain.per.entity", "on");
        SingleDocumentExtraction instance =  new SingleDocumentExtraction(
                configuration,
                new HTMLFixture(copyResourceToTempFile(file)).getOpener("http://nested.test.com"),
                extractorGroup,
                cth

        logger.debug(n3);
    }

    @Test
    public void testModifiableConfiguration_issue183() throws Exception {
        final ModifiableConfiguration modifiableConf = DefaultConfiguration.copy();
        modifiableConf.setProperty("any23.extraction.metadata.timesize", "off");
        final Any23 any23 = new Any23(modifiableConf);

        final String content = FileUtils.readResourceContent("/rdf/rdf-issue183.ttl");
        final DocumentSource source = new StringDocumentSource(content, "http://base.com");
        final ByteArrayOutputStream out = new ByteArrayOutputStream();

    public void testTypedLiteralIncompatibleValueSupport()
    throws IOException, ExtractionException, TripleHandlerException {
        final URI uri = RDFUtils.uri("http://host.com/test-malformed-literal.turtle");
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        final TripleHandler th = new RDFXMLWriter(baos);
        final ExtractionContext extractionContext = new ExtractionContext("turtle-extractor", uri);
        final ExtractionResult result = new ExtractionResultImpl(extractionContext, extractor, th);
        extractor.setStopAtFirstError(false);
        try {
            extractor.run(
                    ExtractionParameters.newDefault(),

TOP

Related Classes of org.apache.any23.configuration.Configuration

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.