Package org.apache.stanbol.enhancer.engines.htmlextractor.impl

Examples of org.apache.stanbol.enhancer.engines.htmlextractor.impl.HtmlExtractor.extract()


        String testFile = "test-rdfa.html";
        // extract text from RDFa annotated html
        InputStream in = getResourceAsStream(testFile);
        assertNotNull("failed to load resource " + testFile, in);

        extractor.extract("file://" + testFile,in,null, "text/html", model);

        // show triples
        int tripleCounter = model.size();
        LOG.debug("RDFa triples: {}",tripleCounter);
        printTriples(model);
View Full Code Here


        // extract text from RDFa annotated html
        InputStream in = getResourceAsStream(testFile);
        assertNotNull("failed to load resource " + testFile, in);

        extractor.extract("file://" + testFile,in,null, "text/html", model);

        // show triples
        int tripleCounter = model.size();
        LOG.debug("Microformat triples: {}",tripleCounter);
        printTriples(model);
View Full Code Here

      // extract text from RDFa annotated html
      InputStream in = getResourceAsStream(testFile);
      assertNotNull("failed to load resource " + testFile, in);

      extractor.extract("file://" + testFile,in,null, "text/html", model);

      // show triples
      int tripleCounter = model.size();
      LOG.debug("Microdata triples: {}",tripleCounter);
      printTriples(model);
View Full Code Here

        // extract text from RDFa annotated html
        InputStream in = getResourceAsStream(testFile);
        assertNotNull("failed to load resource " + testFile, in);

        extractor.extract("file://" + testFile,in,null, "text/html", model);

        // show triples
        int tripleCounter = model.size();
        LOG.debug("Triples: {}",tripleCounter);
        printTriples(model);
View Full Code Here

    public void computeEnhancements(ContentItem ci) throws EngineException {
        HtmlExtractor extractor = new HtmlExtractor(htmlExtractorRegistry, htmlParser);
        MGraph model = new SimpleMGraph();
        ci.getLock().readLock().lock();
        try {
            extractor.extract(ci.getUri().getUnicodeString(), ci.getStream(),null, ci.getMimeType(), model);
        } catch (ExtractorException e) {
            throw new EngineException("Error while processing ContentItem "
                    + ci.getUri()+" with HtmlExtractor",e);
        } finally {
            ci.getLock().readLock().unlock();
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.