Package org.apache.any23.source

Examples of org.apache.any23.source.DocumentSource


        // perform conversions

        try {
            final long start = System.currentTimeMillis();
            for (String inputURI : inputURIs) {
                DocumentSource source = any23.createDocumentSource(inputURI);

                performExtraction( source );
            }
            final long elapsed = System.currentTimeMillis() - start;
View Full Code Here


    public void run() throws Exception {
        if (document.isEmpty()) {
            throw new IllegalArgumentException("No input document URL specified");
        }

        final DocumentSource documentSource = document.get(0);
        final MIMETypeDetector detector = new TikaMIMETypeDetector();
        final MIMEType mimeType = detector.guessMIMEType(
                documentSource.getDocumentURI(),
                documentSource.openInputStream(),
                MIMEType.parse(documentSource.getContentType())
        );
        System.out.println(mimeType);
    }
View Full Code Here

      }else{
        runner = new Any23();
      }
      runner.setHTTPUserAgent("google-refine-rdf-extension");
      HTTPClient client = runner.getHTTPClient();
      DocumentSource source = new HTTPDocumentSource(client, url);
      Repository repository = new SailRepository(
          new ForwardChainingRDFSInferencer(new MemoryStore()));
      repository.initialize();
      RepositoryConnection con = repository.getConnection();
      RepositoryWriter w = new RepositoryWriter(con);
View Full Code Here

    private File getFile() {
        return file;
    }

    public DocumentSource getOpener(String baseURI) {
        return new FileDocumentSource(getFile(), baseURI);
    }
View Full Code Here

        assertContains("<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>", response.getContent());
    }

    @Test
    public void testResponseWithReport() throws Exception {
        content = new FileDocumentSource(
                new File("src/test/resources/org/apache/any23/servlet/missing-og-namespace.html")
        ).readStream();
        acceptHeader = "text/plain";
        HttpTester response = doGetRequest("/best/http://foo.com?validation-mode=validate-fix&report=on");
        Assert.assertEquals(200, response.getStatus());
View Full Code Here

        }
    }

    protected DocumentSource createHTTPDocumentSource(HTTPClient httpClient, String uri)
            throws IOException, URISyntaxException {
        return new HTTPDocumentSource(httpClient, uri);
    }
View Full Code Here

        @Override
        protected DocumentSource createHTTPDocumentSource(HTTPClient httpClient, String uri)
                throws IOException, URISyntaxException {
            requestedURI = uri;
            if(content != null) {
                return new StringDocumentSource(content, uri);
            } else {
                return super.createHTTPDocumentSource(httpClient, uri);
            }
        }
View Full Code Here

            if (req.getParameter("type") != null && !"".equals(req.getParameter("type"))) {
                type = req.getParameter("type");
            }
            log("Attempting conversion to '" + format + "' from body parameter");
            responder.runExtraction(
                    new StringDocumentSource(req.getParameter("body"), Servlet.DEFAULT_BASE_URI, type),
                    eps,
                    format,
                    report, annotate
            );
            return;
View Full Code Here

        ps.println("</issueReport>");

    }

    private void printReport(String msg, Throwable e, ExtractionReport er, PrintStream ps) {
        XMLValidationReportSerializer reportSerializer = new XMLValidationReportSerializer();
        ps.println("<report>");

        // Human readable error message.
        if(msg != null) {
            ps.printf("<message>%s</message>\n", msg);
        } else {
            ps.print("<message/>\n");
        }

        // Error stack trace.
        if(e != null) {
            ps.println("<error>");
            ps.println("<![CDATA[");
            e.printStackTrace(ps);
            ps.println("]]>");
            ps.println("</error>");
        } else {
            ps.println("<error/>");
        }

        // Issue Report.
        printIssueReport(er, ps);

        // Validation report.
        try {
            reportSerializer.serialize(er.getValidationReport(), ps);
        } catch (SerializationException se) {
            ps.println("An error occurred while serializing error.");
            se.printStackTrace(ps);
        }
        ps.println("</report>");
View Full Code Here

        return new CSVExtractorFactory();
    }

    @Test
    public void testExtractionCommaSeparated() throws RepositoryException {
        CSV csv = CSV.getInstance();
        assertExtract("/org/apache/any23/extractor/csv/test-comma.csv");
        logger.debug(dumpModelToRDFXML());

        assertModelNotEmpty();
        assertStatementsSize(null, null, null, 28);
View Full Code Here

TOP

Related Classes of org.apache.any23.source.DocumentSource

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.