Examples of HTMLExtractor


Examples of com.ontometrics.scraper.extraction.HtmlExtractor

      public URL getBaseUrl() {
        // TODO Auto-generated method stub
        return PagedListingFolder.getUrl();
      }
    };
    HtmlExtractor htmlExtractor = html().url(PagedListingTable.getUrl());

    List<Record> records = new ListingDetailScraper().setConvertURLs(true)
        .iterator(pageIterator)
        .listing(new LinkExtractor().source(htmlExtractor))
        .details(new DefaultFieldExtractor().source(htmlExtractor))
View Full Code Here

Examples of org.apache.jmeter.extractor.HtmlExtractor

    @Override
    public void configure(TestElement el) {
        super.configure(el);
        if (el instanceof HtmlExtractor){
            HtmlExtractor htmlExtractor = (HtmlExtractor) el;
            showScopeSettings(htmlExtractor, true);
            expressionField.setText(htmlExtractor.getExpression());
            attributeField.setText(htmlExtractor.getAttribute());
            defaultField.setText(htmlExtractor.getDefaultValue());
            matchNumberField.setText(htmlExtractor.getMatchNumberAsString());
            refNameField.setText(htmlExtractor.getRefName());
            extractorImplName.setSelectedItem(htmlExtractor.getExtractor());
        }
    }
View Full Code Here

Examples of org.apache.jmeter.extractor.HtmlExtractor

    /**
     * @see org.apache.jmeter.gui.JMeterGUIComponent#createTestElement()
     */
    @Override
    public TestElement createTestElement() {
        AbstractScopedTestElement extractor = new HtmlExtractor();
        modifyTestElement(extractor);
        return extractor;
    }
View Full Code Here

Examples of org.apache.jmeter.extractor.HtmlExtractor

     */
    @Override
    public void modifyTestElement(TestElement extractor) {
        super.configureTestElement(extractor);
        if (extractor instanceof HtmlExtractor) {
            HtmlExtractor htmlExtractor = (HtmlExtractor) extractor;
            saveScopeSettings(htmlExtractor);
            htmlExtractor.setRefName(refNameField.getText());
            htmlExtractor.setExpression(expressionField.getText());
            htmlExtractor.setAttribute(attributeField.getText());
            htmlExtractor.setDefaultValue(defaultField.getText());
            htmlExtractor.setMatchNumber(matchNumberField.getText());
            if(extractorImplName.getSelectedIndex()< HtmlExtractor.getImplementations().length) {
                htmlExtractor.setExtractor(HtmlExtractor.getImplementations()[extractorImplName.getSelectedIndex()]);
            } else {
                htmlExtractor.setExtractor(USE_DEFAULT_EXTRACTOR_IMPL);              
            }

        }
    }
View Full Code Here

Examples of org.apache.stanbol.enhancer.engines.htmlextractor.impl.HtmlExtractor

     * @throws ExtractorException if there is an error during extraction
     * @throws IOException if there is an error when reading the document
     */
    @Test
    public void testMFExtraction() throws Exception {
        HtmlExtractor extractor = new HtmlExtractor(registry, parser);
        MGraph model = new SimpleMGraph();
        String testFile = "test-MF.html";

        // extract text from RDFa annotated html
        InputStream in = getResourceAsStream(testFile);
        assertNotNull("failed to load resource " + testFile, in);

        extractor.extract("file://" + testFile,in,null, "text/html", model);

        // show triples
        int tripleCounter = model.size();
        LOG.debug("Microformat triples: {}",tripleCounter);
        printTriples(model);
View Full Code Here

Examples of org.apache.stanbol.enhancer.engines.htmlextractor.impl.HtmlExtractor

     *
     * @throws Exception
     */
    @Test
    public void testMicrodataExtraction() throws Exception {
      HtmlExtractor extractor = new HtmlExtractor(registry, parser);
      MGraph model = new SimpleMGraph();
      String testFile = "test-microdata.html";

      // extract text from RDFa annotated html
      InputStream in = getResourceAsStream(testFile);
      assertNotNull("failed to load resource " + testFile, in);

      extractor.extract("file://" + testFile,in,null, "text/html", model);

      // show triples
      int tripleCounter = model.size();
      LOG.debug("Microdata triples: {}",tripleCounter);
      printTriples(model);
View Full Code Here

Examples of org.apache.stanbol.enhancer.engines.htmlextractor.impl.HtmlExtractor

     *
     * @throws Exception
     */
    @Test
    public void testRootExtraction() throws Exception {
        HtmlExtractor extractor = new HtmlExtractor(registry, parser);
        MGraph model = new SimpleMGraph();
        String testFile = "test-MultiRoot.html";

        // extract text from RDFa annotated html
        InputStream in = getResourceAsStream(testFile);
        assertNotNull("failed to load resource " + testFile, in);

        extractor.extract("file://" + testFile,in,null, "text/html", model);

        // show triples
        int tripleCounter = model.size();
        LOG.debug("Triples: {}",tripleCounter);
        printTriples(model);
View Full Code Here

Examples of org.apache.stanbol.enhancer.engines.htmlextractor.impl.HtmlExtractor

        return CANNOT_ENHANCE;
    }
   
    @Override
    public void computeEnhancements(ContentItem ci) throws EngineException {
        HtmlExtractor extractor = new HtmlExtractor(htmlExtractorRegistry, htmlParser);
        MGraph model = new SimpleMGraph();
        ci.getLock().readLock().lock();
        try {
            extractor.extract(ci.getUri().getUnicodeString(), ci.getStream(),null, ci.getMimeType(), model);
        } catch (ExtractorException e) {
            throw new EngineException("Error while processing ContentItem "
                    + ci.getUri()+" with HtmlExtractor",e);
        } finally {
            ci.getLock().readLock().unlock();
View Full Code Here

Examples of org.apache.stanbol.enhancer.engines.htmlextractor.impl.HtmlExtractor

     * @throws ExtractorException if there is an error during extraction
     * @throws IOException if there is an error when reading the document
     */
    @Test
    public void testRdfaExtraction() throws Exception {
        HtmlExtractor extractor = new HtmlExtractor(registry, parser);
        MGraph model = new SimpleMGraph();
        String testFile = "test-rdfa.html";
        // extract text from RDFa annotated html
        InputStream in = getResourceAsStream(testFile);
        assertNotNull("failed to load resource " + testFile, in);

        extractor.extract("file://" + testFile,in,null, "text/html", model);

        // show triples
        int tripleCounter = model.size();
        LOG.debug("RDFa triples: {}",tripleCounter);
        printTriples(model);
View Full Code Here

Examples of org.vietspider.html.path2.HTMLExtractor

        tree.removeAll();
        if(document != null) handler.createTreeItem(tree, document);
        new AutoSelectDataNode2(document, url, handler, tree);
       
        if(paths.length < 1) {
          HTMLExtractor extractor  = new HTMLExtractor();
          NodePathParser pathParser = new NodePathParser();
          if(hyperlinkUtil == null) hyperlinkUtil = new HyperLinkUtil();
          HTMLNode header = null;
          HTMLNode body = null;
          try {
            NodePath nodePath  = pathParser.toPath("HEAD");
            header = extractor.lookNode(document.getRoot(), nodePath);
            nodePath  = pathParser.toPath("BODY");
            body = extractor.lookNode(document.getRoot(), nodePath);
          } catch (Exception e) {
            ClientLog.getInstance().setException(getShell(), e);
          }
         
          if(header == null || body == null) return;
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.