Package org.jsoup.select

Examples of org.jsoup.select.Elements


        int status = 0;

        if (doc == null)
            return;
               
        Elements links = doc.select("a");

        if (links == null)
            return;

        int index = 0;
        int returncode = 0;           
           
        for (int i = 0; i < links.toArray().length; i++)
        {
            String link = links.get(i).attr("href").toLowerCase();
            String linkalt = links.get(i).attr("alt");
            String linktitle = links.get(i).attr("title");
            String linktext = links.get(i).text();
            String linkinnerhtml = links.get(i).html();
            String newLink2 = null;
            int wrong = 0;

            if (!link.contains("javascript:") && link != null)
            {
View Full Code Here


     * @throws Exception
     */

   void parseMetatags() throws Exception {
       try {
       Elements meta;
       String comments = "";
      
       meta = doc.getElementsByTag("META");
       Object metatags[] = meta.toArray();

       for (int i = 0; i < metatags.length; i++)
       {
          String metatag = metatags[i].toString().toLowerCase().trim();
          if(metatag.contains("keywords"))
View Full Code Here

            URI baseURI = new URI( baseurl );
            // to make debugging easier, start with a string. This is assuming UTF-8, which might not be a safe
            // assumption.
            String content = IOUtils.toString( stream, "utf-8" );
            Document doc = Jsoup.parse( content, baseurl );
            Elements links = doc.getElementsByTag( "a" );
            Set<String> results = new HashSet<String>();
            for ( int lx = 0; lx < links.size(); lx++ )
            {
                Element link = links.get( lx );
                /*
                 * The abs:href loses directories, so we deal with absolute paths ourselves below in cleanLink
                 */
                String target = link.attr( "href" );
                if ( target != null )
View Full Code Here

        if (doc == null) {
          reporter.incrCounter(this._counterGroup, "Skipped - Unable to Parse HTML", 1);
          return;
        }

        Elements mf = doc.select("[itemtype~=schema.org]");

        if (mf.size() > 0) {
          for (Element e : mf) {
            if (e.hasAttr("itemtype")) {
              output.collect(new Text(e.attr("itemtype").toLowerCase().trim()), new LongWritable(1));
            }
          }
View Full Code Here

        Element element = doc.getElementsByTag("h1").first();
        builder.append(element.text()).append("\n\n");
    }

    protected void addPanels(Document doc, StringBuilder builder) {
        Elements elements = doc.getElementsByTag("ac:structured-macro");
        for (Element element : elements) {
            String name = element.attr("ac:name");
            if (acceptedMacros.contains(name)) {
                appendMacroTitle(builder, element);
                appendMacroBody(builder, element);
View Full Code Here

            }
        }
    }

    private void appendMacroTitle(StringBuilder builder, Element element) {
        Elements parameters = element.getElementsByTag("ac:parameter");
        if (parameters.size() > 0) {
            for (Element parameter : parameters) {
                if ("title".equals(parameter.attr("ac:name"))) {
                    String text = parameter.text();
                    if (!text.contains(":")) {
                        text = text + ":";
View Full Code Here

            }
        }
    }

    private void appendMacroBody(StringBuilder builder, Element element) {
        Elements bodies = element.getElementsByTag("ac:rich-text-body");
        if (!bodies.isEmpty()) {
            Element body = bodies.first();
            cleanNodes(body, "div");
            cleanNodes(body, "p");
            builder.append(body.text().replaceAll("<br/>", "\n")).append("\n");
        }
    }
View Full Code Here

            builder.append(body.text().replaceAll("<br/>", "\n")).append("\n");
        }
    }

    protected void addExamples(Document doc, StringBuilder builder) {
        Elements tables = doc.getElementsByTag("table");
        if (!tables.isEmpty()) {
            builder.append("Examples:\n");
            Element table = tables.first();
            Elements headers = table.select("tr").first().select("th");
            for (Element header : headers) {
                builder.append("|").append(header.text());
            }
            builder.append("|\n");
            Elements data = table.select("tr");
            for (int i = 1; i < data.size(); i++) {
                for (Element cell : data.get(i).select("td")) {
                    builder.append("|").append(cell.text());
                }
                builder.append("|\n");
            }
        }
View Full Code Here

   *
   * @param doc the html document
   * @return a string representing the stylesheet.
   */
  private String fetchStyles(Document doc) {
    Elements els = doc.select(STYLE_TAG);
    StringBuilder styles = new StringBuilder();
    for (Element e : els) {
      if (e.attr("data-inline").equals("true")) {
        styles.append(e.data());
        e.remove();
View Full Code Here

   * Inlines images marked with <code>data-inline="true"</code>
   *
   * @param doc the html document
   */
  private void inlineImages(Document doc) {
    Elements allImages = doc.getElementsByTag(IMG_TAG);
    for (Element img : allImages) {
      if (img.attr("data-inline").equals("true")) {
        String src = img.attr(IMG_SRC_ATTR);
        try {
          URL url = new URL(src);
View Full Code Here

TOP

Related Classes of org.jsoup.select.Elements

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.