Package org.jsoup.nodes

Examples of org.jsoup.nodes.Document


        {
            URI baseURI = new URI( baseurl );
            // to make debugging easier, start with a string. This is assuming UTF-8, which might not be a safe
            // assumption.
            String content = IOUtils.toString( stream, "utf-8" );
            Document doc = Jsoup.parse( content, baseurl );
            Elements links = doc.getElementsByTag( "a" );
            Set<String> results = new HashSet<String>();
            for ( int lx = 0; lx < links.size(); lx++ )
            {
                Element link = links.get( lx );
                /*
 
View Full Code Here


* @author Martin Kersten<Martin.Kersten.mk@gmail.com>
*/
public class JsoupUtilTest {
  @Test
  public void testFindFirstByTagSingleTag() {
    Document document = Jsoup.parse("<html><body><a href=\"A\">A</a><a href=\"B\">B</a></body></html>");
    assertNotNull(JsoupDataUtil.findFirstByTag(document, "a"));
    assertNotNull(JsoupDataUtil.findFirstByTag(document, "body"));
    assertNotNull(JsoupDataUtil.findFirstByTag(document, "body/a"));
    assertNotNull(JsoupDataUtil.findFirstByTag(document, "html/body/a"));
    assertNotNull(JsoupDataUtil.findFirstByTag(document, "html/a"));
View Full Code Here

    return (List<? extends HtmlElement>) super.findAll(csss);
  }
 
  @Override
  protected void loadPage() throws Exception {
    Document jsoup = Jsoup.parse(getInputStream(), getContentEncoding(response), getUri());
    setBaseUri(jsoup.head().baseUri());
    this.htmlElements = new HtmlElements(this, jsoup);
  }
View Full Code Here

          reporter.incrCounter(this._counterGroup, "Skipped - HTML Too Long", 1);
          return;
        }

        // Count all 'itemtype' attributes referencing 'schema.org'
        Document doc = value.getParsedHTML();

        if (doc == null) {
          reporter.incrCounter(this._counterGroup, "Skipped - Unable to Parse HTML", 1);
          return;
        }

        Elements mf = doc.select("[itemtype~=schema.org]");

        if (mf.size() > 0) {
          for (Element e : mf) {
            if (e.hasAttr("itemtype")) {
              output.collect(new Text(e.attr("itemtype").toLowerCase().trim()), new LongWritable(1));
View Full Code Here

        this.confluence = new Confluence(client);
    }

    public String loadResourceAsText(String resourcePath) {
        Page page = confluence.loadPage(resourcePath, false);
        Document doc = Jsoup.parse(page.getBody());
        StringBuilder builder = new StringBuilder();
        addTitle(doc, builder);
        addPanels(doc, builder);
        addExamples(doc, builder);
        return builder.toString();
View Full Code Here

   * @param input the html document
   * @return the processed html document
   */
  public String process(String input) {

    Document doc = Jsoup.parse(input);
   
    extractStyles(doc);
    applyStyles(doc);
    inlineImages(doc);

    doc.outputSettings(doc.outputSettings().prettyPrint(false).escapeMode(Entities.EscapeMode.xhtml));
    String output = doc.outerHtml();
    return output;
  }
View Full Code Here

          if (url.contains("playlist?")) {
            // fsyprint("Fetching %s...", url);
            String last = "";
            final String out = Constants.DATA_PATH + "playlist/"
                + playListName + ".plist";
            final Document doc = Jsoup.connect(url).get();
            final Elements links = doc.select("a[href]");
            final File playListOut = new File(out);
            final FileOutputStream fos = new FileOutputStream(
                playListOut);
            final BufferedWriter bw = new BufferedWriter(
                new OutputStreamWriter(fos));
View Full Code Here

    public JsonObject extractTweet(String html)
  throws java.net.MalformedURLException, java.io.UnsupportedEncodingException {
  JsonObject status = new JsonObject();

  Document doc = Jsoup.parse(html);
  Element tweet_div = doc.select("div.permalink-tweet").first();

  String tweet_text = tweet_div.select("p.tweet-text").first().text();
  status.addProperty("text", tweet_text);

  String tweet_id = tweet_div.attr("data-tweet-id");
  status.addProperty("id_str", tweet_id);
  status.addProperty("id", Long.parseLong(tweet_id));

  String timestamp = doc.select("span.js-short-timestamp").first().attr("data-time");
  Date created_at = new Date();
  created_at.setTime(Long.parseLong(timestamp) * 1000);
  status.addProperty("created_at", date_fmt.format(created_at));

  Elements js_stats_retweets = doc.select("li.js-stat-retweets");
  if (!js_stats_retweets.isEmpty()) {
      status.addProperty("retweeted", true);
      String count = js_stats_retweets.select("strong").first().text();
      status.addProperty("retweet_count", Long.parseLong(count));
  } else {
      status.addProperty("retweeted", false);
      status.addProperty("retweet_count", 0);
  }
  Elements js_stats_favs = doc.select("li.js-stat-favorites");
  status.addProperty("favorited", !js_stats_favs.isEmpty());
     

  // User subfield
  JsonObject user = new JsonObject();
  String user_id = tweet_div.attr("data-user-id");
  user.addProperty("id_str", user_id);
  user.addProperty("id", Long.parseLong(user_id));
  String screen_name = tweet_div.attr("data-screen-name");
  user.addProperty("screen_name", screen_name);
  String user_name = tweet_div.attr("data-name");
  user.addProperty("name", user_name);
 
  status.add("user", user);
 
  // Geo information
  Elements tweet_loc = doc.select("a.tweet-geo-text");
  if (!tweet_loc.isEmpty()) {
      JsonObject location = new JsonObject();
      Element loc = tweet_loc.first();
      // Adding http to avoid malformed URL exception
      URL url = new URL("http:" + loc.attr("href"));
View Full Code Here

        jsonReports.add(new File(ReportBuilderTest.class.getClassLoader().getResource("net/masterthought/cucumber/project3.json").toURI()).getAbsolutePath());
        ReportBuilder reportBuilder = new ReportBuilder(jsonReports, rd, "", "1", "cucumber-reporting", false, false, true, true, false, "", false);
        reportBuilder.generateReports();

        File input = new File(rd, "feature-overview.html");
        Document doc = Jsoup.parse(input, "UTF-8", "");
        assertThat(fromId("overview-title", doc).text(), is("Feature Overview for Build: 1"));
        assertStatsHeader(doc);
        assertStatsFirstFeature(doc);
        assertNotNull(fromId("flash-charts", doc));
    }
View Full Code Here

        jsonReports.add(new File(ReportBuilderTest.class.getClassLoader().getResource("net/masterthought/cucumber/project3.json").toURI()).getAbsolutePath());
        ReportBuilder reportBuilder = new ReportBuilder(jsonReports, rd, "", "1", "cucumber-reporting", false, false, false, true, false, "", false);
        reportBuilder.generateReports();

        File input = new File(rd, "feature-overview.html");
        Document doc = Jsoup.parse(input, "UTF-8", "");
        assertThat(fromId("overview-title", doc).text(), is("Feature Overview for Build: 1"));
        assertStatsHeader(doc);
        assertStatsFirstFeature(doc);
        assertStatsTotals(doc);
        assertNotNull(fromId("js-charts", doc));
View Full Code Here

TOP

Related Classes of org.jsoup.nodes.Document

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.