Package org.apache.any23.filter

Examples of org.apache.any23.filter.IgnoreAccidentalRDFa


                return 2000;
            }
        });
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        TripleHandler handler = new NTriplesWriter(byteArrayOutputStream);
        TripleHandler rdfWriter = new IgnoreAccidentalRDFa(handler);
        ReportingTripleHandler reporting = new ReportingTripleHandler(rdfWriter);

        DocumentSource source = getDocumentSourceFromResource(
                "/html/rdfa/ansa_2010-02-26_12645863.html",
                "http://host.com/service");
View Full Code Here


     */
    private ExtractionReport detectAndExtract(String in) throws Exception {
        Any23 any23 = new Any23();
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        ReportingTripleHandler outputHandler = new ReportingTripleHandler(
                new IgnoreAccidentalRDFa(new IgnoreTitlesOfEmptyDocuments(
                        new NTriplesWriter(out))));
        return any23.extract(in, "http://host.com/path", outputHandler);
    }
View Full Code Here

            benchmarkTripleHandler = new BenchmarkTripleHandler(tripleHandler);
            tripleHandler = benchmarkTripleHandler;
        }

        if (noTrivial) {
            tripleHandler = new IgnoreAccidentalRDFa(new IgnoreTitlesOfEmptyDocuments(tripleHandler),
                                                     true    // suppress stylesheet triples.
                                                     );
        }

        reportingTripleHandler = new ReportingTripleHandler(tripleHandler);
View Full Code Here

        tripleHandlers.add(new IgnoreAccidentalRDFa(fw));
        tripleHandlers.add(new CountingTripleHandler());
        rdfWriter = new CompositeTripleHandler(tripleHandlers);
        reporter = new ReportingTripleHandler(rdfWriter);
        rdfWriter = new IgnoreAccidentalRDFa(
            new IgnoreTitlesOfEmptyDocuments(reporter),
            true    // suppress stylesheet triples.
        );
        return true;
    }
View Full Code Here

    private ExtractionReport detectAndExtract(String in) throws Exception {
        Any23 any23 = new Any23();
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        ReportingTripleHandler outputHandler = new ReportingTripleHandler(
                new IgnoreAccidentalRDFa(
                        new IgnoreTitlesOfEmptyDocuments(
                                new NTriplesWriter(out)
                        )
                )
        );
        return any23.extract(in, "http://host.com/path", outputHandler);
View Full Code Here

     */
    @Test
    public void testProgrammaticExtraction() throws ExtractionException, IOException, URISyntaxException {
        Any23 any23 = new Any23();
        any23.setHTTPUserAgent("Any23-Servlet");
        any23.setHTTPClient(new DefaultHTTPClient() {
            @Override
            protected int getConnectionTimeout() {
                return 5000;
            }

View Full Code Here

    public void testDemoCodeSnippet2() throws Exception{
        assumeOnlineAllowed();

        /*1*/ Any23 runner = new Any23();
        /*2*/ runner.setHTTPUserAgent("test-user-agent");
        /*3*/ HTTPClient httpClient = runner.getHTTPClient();
        /*4*/ DocumentSource source = new HTTPDocumentSource(
                 httpClient,
                 "http://dbpedia.org/resource/Trento"
              );
        /*5*/ ByteArrayOutputStream out = new ByteArrayOutputStream();
View Full Code Here

    public void testGZippedContent() throws IOException, URISyntaxException, ExtractionException {
        assumeOnlineAllowed();

        Any23 runner = new Any23();
        runner.setHTTPUserAgent("test-user-agent");
        HTTPClient httpClient = runner.getHTTPClient();
        DocumentSource source = new HTTPDocumentSource(
                httpClient,
                "http://products.semweb.bestbuy.com/y/products/7590289/"
        );
        ByteArrayOutputStream out = new ByteArrayOutputStream();
View Full Code Here

                configuration,
                new HTMLFixture(copyResourceToTempFile(file)).getOpener("http://nested.test.com"),
                extractorGroup,
                cth
        );
        instance.setMIMETypeDetector( new TikaMIMETypeDetector(new WhiteSpacesPurifier()) );
        return instance;
    }
View Full Code Here

                configuration,
                new HTMLFixture(copyResourceToTempFile(file)).getOpener("http://nested.test.com"),
                extractorGroup,
                cth
        );
        instance.setMIMETypeDetector( new TikaMIMETypeDetector(new WhiteSpacesPurifier()) );
        return instance;
    }
View Full Code Here

TOP

Related Classes of org.apache.any23.filter.IgnoreAccidentalRDFa

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.