Examples of DocumentSource


Examples of com.google.gdata.data.gtt.DocumentSource

    entry.setTitle(new PlainTextConstruct(title));

    if (parser.containsKey("weburl")) {
      String url = parser.getValue("weburl");
      System.out.println("... with html contents from " + url);
      DocumentSource docSource = new DocumentSource(DocumentSource.Type.HTML,
          url);
      entry.setDocumentSource(docSource);
    } else if (parser.containsKey("wikipediaurl")) {
      String url = parser.getValue("wikipediaurl");
      System.out.println("... with mediawiki contents from " + url);
      DocumentSource docSource = new DocumentSource(DocumentSource.Type.WIKI,
          url);
      entry.setDocumentSource(docSource);
    } else if (parser.containsKey("knolurl")) {
      String url = parser.getValue("knolurl");
      System.out.println("... with knol contents from " + url);
      DocumentSource docSource = new DocumentSource(DocumentSource.Type.KNOL,
          url);
      entry.setDocumentSource(docSource);
    } else if (parser.containsKey("file")) {
      String filename = parser.getValue("file");
      System.out.println("... with contents from file at " + filename);
View Full Code Here

Examples of org.apache.any23.source.DocumentSource

        /*2*/ final String content = "@prefix foo: <http://example.org/ns#> .   " +
                                     "@prefix : <http://other.example.org/ns#> ." +
                                     "foo:bar foo: : .                          " +
                                     ":bar : foo:bar .                           ";
        //    The second argument of StringDocumentSource() must be a valid URI.
        /*3*/ DocumentSource source = new StringDocumentSource(content, "http://host.com/service");
        /*4*/ ByteArrayOutputStream out = new ByteArrayOutputStream();
        /*5*/ TripleHandler handler = new NTriplesWriter(out);
              try {
        /*6*/     runner.extract(source, handler);
              } finally {
View Full Code Here

Examples of org.apache.any23.source.DocumentSource

        assumeOnlineAllowed();

        /*1*/ Any23 runner = new Any23();
        /*2*/ runner.setHTTPUserAgent("test-user-agent");
        /*3*/ HTTPClient httpClient = runner.getHTTPClient();
        /*4*/ DocumentSource source = new HTTPDocumentSource(
                 httpClient,
                 "http://dbpedia.org/resource/Trento"
              );
        /*5*/ ByteArrayOutputStream out = new ByteArrayOutputStream();
        /*6*/ TripleHandler handler = new NTriplesWriter(out);
View Full Code Here

Examples of org.apache.any23.source.DocumentSource

        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        TripleHandler handler = new RDFXMLWriter(byteArrayOutputStream);
        TripleHandler rdfWriter = new IgnoreAccidentalRDFa(handler);
        ReportingTripleHandler reporting = new ReportingTripleHandler(rdfWriter);

        DocumentSource source = getDocumentSourceFromResource(
                    "/html/rdfa/ansa_2010-02-26_12645863.html",
                    "http://host.com/service");

        Assert.assertTrue( any23.extract(source, reporting).hasMatchingExtractors() );
        try {
View Full Code Here

Examples of org.apache.any23.source.DocumentSource

        assumeOnlineAllowed();

        Any23 runner = new Any23();
        runner.setHTTPUserAgent("test-user-agent");
        HTTPClient httpClient = runner.getHTTPClient();
        DocumentSource source = new HTTPDocumentSource(
                httpClient,
                "http://products.semweb.bestbuy.com/y/products/7590289/"
        );
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        TripleHandler handler = new NTriplesWriter(out);
View Full Code Here

Examples of org.apache.any23.source.DocumentSource

    @Test
    public void testExtractionParameters() throws IOException, ExtractionException, TripleHandlerException {
        final int EXPECTED_TRIPLES  = 6;
        Any23 runner = new Any23();
        DocumentSource source = getDocumentSourceFromResource(
                "/org/apache/any23/validator/missing-og-namespace.html",
                "http://www.test.com"
        );

        ByteArrayOutputStream baos = new ByteArrayOutputStream();
View Full Code Here

Examples of org.apache.any23.source.DocumentSource

    @Test
    public void testExtractionParametersWithNestingDisabled()
    throws IOException, ExtractionException, TripleHandlerException {
        final int EXPECTED_TRIPLES = 19;
        Any23 runner = new Any23();
        DocumentSource source = getDocumentSourceFromResource(
                "/microformats/nested-microformats-a1.html",
                "http://www.test.com"
        );

        ByteArrayOutputStream baos = new ByteArrayOutputStream();
View Full Code Here

Examples of org.apache.any23.source.DocumentSource

    }

    @Test
    public void testExceptionPropagation() throws IOException {
        Any23 any23 = new Any23();
        DocumentSource source = getDocumentSourceFromResource(
                "/application/turtle/geolinkeddata.ttl",
                "http://www.test.com"
        );
        CountingTripleHandler cth1 = new CountingTripleHandler();
        try {
View Full Code Here

Examples of org.apache.any23.source.DocumentSource

    @Test
    public void testXMLMimeTypeManagement() throws IOException, ExtractionException {
        final String documentURI = "http://www.test.com/resource.xml";
        final String contentType = "application/xml";
        final String in = StreamUtils.asString( this.getClass().getResourceAsStream("any23-xml-mimetype.xml") );
        final DocumentSource doc = new StringDocumentSource(in, documentURI, contentType);
        final Any23 any23 = new Any23();
        final CountingTripleHandler cth = new CountingTripleHandler(false);
        final ReportingTripleHandler rth = new ReportingTripleHandler(cth);
        final ExtractionReport report = any23.extract(doc, rth);
        Assert.assertFalse(report.hasMatchingExtractors());
View Full Code Here

Examples of org.apache.any23.source.DocumentSource

    @Test
    public void testAbstractMethodErrorIssue186_1() throws IOException, ExtractionException{
        final Any23 runner = new Any23();
        final String content = FileUtils.readResourceContent("/html/rdfa/rdfa-issue186-1.xhtml");
        final DocumentSource source = new StringDocumentSource(content, "http://base.com");
        final ByteArrayOutputStream out = new ByteArrayOutputStream();
        final TripleHandler handler = new NTriplesWriter(out);
        runner.extract(source, handler);
        String n3 = out.toString("UTF-8");
        logger.debug(n3);
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.