Package de.jungblut.crawl

Examples of de.jungblut.crawl.FetchResult


      InputStream connection = getConnection(realUrl);
      String html = consumeStream(connection);

      final HashSet<String> set = extractOutlinks(html, realUrl);

      return new FetchResult(realUrl, set);
    } catch (ParserException pEx) {
      // ignore parser exceptions, they contain mostly garbage
    } catch (RuntimeException rEx) {
      rEx.printStackTrace();
    } catch (Exception e) {
View Full Code Here


  }

  @Test
  public void testExtraction() throws Exception {
    OutlinkExtractor mock = new OutlinkExtractor();
    FetchResult extract = mock.extract(HOME2);
    assertEquals(HOME2, extract.getUrl());
    assertEquals(4, extract.getOutlinks().size());
    TreeSet<String> sorted = new TreeSet<>(extract.getOutlinks());
    Iterator<String> it = sorted.iterator();
    assertEquals("http://people.apache.org/local.html", it.next());
    assertEquals("http://people.apache.org/~tjungblut/downloads/local.html",
        it.next());
    // that is the correct html expansion
View Full Code Here

TOP

Related Classes of de.jungblut.crawl.FetchResult

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.