Package net.htmlparser.jericho

Examples of net.htmlparser.jericho.Element


        Source src = new Source(content);
        OutputDocument out = new OutputDocument(src);
        for (String filteredTagName : filteredTags) {
            for (StartTag startTag : src.getAllStartTags(filteredTagName)) {
                if (startTag.getTagType() == StartTagType.NORMAL) {
                    Element element = startTag.getElement();
                    EndTag endTag = element.getEndTag();
                    if (removeContentBetweenTags && endTag != null) {
                        out.remove(element);
                    } else {
                        out.remove(startTag);
                        if (endTag != null) {
View Full Code Here


        return null;
    }

    private String getContextForArea(Element areaNode, Source source) {
      String ctx = areaNode.toString();
      Element map = areaNode.getParentElement();
      if (map.getName().equals(HTMLElementName.MAP) && map.getAttributeValue("name") != null) {
        StartTag img = source.getFirstStartTag("usemap", "#" + map.getAttributeValue("name"), false);
        if (img != null) {
          ctx = img.getElement().toString();
        }
      }
      return ctx;
View Full Code Here

        if (!isDataTable) {
            return errors;
        }

        // Criteria 5.3
        Element header = null;

        for (Element childElement : node.getChildElements()) {
            if (HTMLElementName.TR.equals(childElement.getName())) {
                header = childElement;
                break;
            } else if (HTMLElementName.TBODY.equals(childElement.getName())) {
                header = childElement.getChildElements().get(0);
                break;
            }
        }

        if (header == null) {
            logger.debug("No table data, returning...");
            return errors;
        }

        // get the first cell
        if (logger.isDebugEnabled()) {
            logger.debug("Header: " + header.getName());
        }

        final Set<String> ids = new HashSet<String>();
        int scopes = 0;
        for (Element element : header.getChildElements()) {
            if (logger.isDebugEnabled()) {
                logger.debug("Header in loop: " + element.getName());
            }

            if (!HTMLElementName.TH.equals(element.getName())) {
View Full Code Here

            throw new AnnotationException("Error reading output from WikiMachine ",e);
        }
        List<Element>KeywordElements=parser.getAllElementsByClass("keywords");

        if (KeywordElements!=null && !KeywordElements.isEmpty()){
            Element keywordElement= KeywordElements.get(0);
            for (Element linkElement : keywordElement.getAllElements()) {
                wikiUrl="";
                surfaceForm="";
                wikiUrl=linkElement.getAttributeValue("href");
                if (wikiUrl!=null)
                    if (wikiUrl.startsWith(wikiPrefix)) {
View Full Code Here

        Source source = new Source(text);
        return createMarkdownInsideHtmlBlockNode(text, 0, source, new SuperNode());
    }

    private Node createMarkdownInsideHtmlBlockNode(String text, int pos, Source source, SuperNode superNode) {
        Element markdownElement = source.getNextElement(pos, "markdown", MARKDOWN_ATTRIBUTE_PATTERN);
        if (markdownElement == null) {
            superNode.getChildren().add(new HtmlBlockNode(text.substring(pos)));
            return superNode;
        }

        superNode.getChildren().add(new HtmlBlockNode(text.substring(pos, markdownElement.getBegin())));

        String indent = getIndent(text, markdownElement);
        Tag endTag = getEndTag(text, source, markdownElement, indent);
        String innerText = getInnerText(text, markdownElement, endTag, indent);
        List<Node> children = parseInnerText(innerText);

        superNode.getChildren().add(new MarkdownInsideHtmlBlockNode(
            markdownElement.getStartTag().toString(),
            children,
            endTag.toString()
        ));

        return createMarkdownInsideHtmlBlockNode(text, endTag.getEnd(), source, superNode);
View Full Code Here

    {
        final ResourceFile rf = resources.get ( 0 );
        final Source xmlSource = new Source ( ReflectUtils.getClassSafely ( rf.getClassName () ).getResource ( rf.getSource () ) );
        xmlSource.fullSequentialParse ();

        final Element baseClassTag = xmlSource.getFirstElement ( SkinInfoConverter.CLASS_NODE );
        final String baseClass = baseClassTag != null ? baseClassTag.getContent ().toString () : null;

        for ( final Element includeTag : xmlSource.getAllElements ( SkinInfoConverter.INCLUDE_NODE ) )
        {
            final String includeClass = includeTag.getAttributeValue ( SkinInfoConverter.NEAR_CLASS_ATTRIBUTE );
            final String finalClass = includeClass != null ? includeClass : baseClass;
View Full Code Here

    source.fullSequentialParse();
    OutputDocument outputDocument = new OutputDocument(source);
    List<Tag> tags = source.getAllTags();
    int pos = 0;
    for (Tag tag : tags) {
      Element tagElement = tag.getElement();
      if (tagElement == null) {
        System.out.println(tag.getName());
      } else {
        StartTag startTag = tagElement.getStartTag();
        Attributes attributes = startTag.getAttributes();
        if (attributes != null) {
          for (Attribute attribute : startTag.getAttributes()) {
            if (uppercase) {
              outputDocument.replace(attribute.getNameSegment(), attribute.getNameSegment().toString()
View Full Code Here

    source.fullSequentialParse();

    String[] parsedOnClosingTag = source.toString().split("</");

    log.info("split on close tag: {} and {}", parsedOnClosingTag[0], parsedOnClosingTag[1]);
    Element liElement = source.getAllElements(HTMLElementName.LI).get(0);
    log.info("li: {}", liElement);
    log.info("li tags: {}", liElement.getAllTags());
    Field field = extractFieldByDetectingTagWrapper(liElement);
    log.info("found field: {}", field);

  }
View Full Code Here

  }

  private void removeInvalidFields(List<Element> fields) {
    java.util.Iterator<Element> iterator = fields.iterator();
    while (iterator.hasNext()) {
      Element field = iterator.next();
      if (!isAField(field.toString())) {
        log.debug("pruning invalid field: {}", field);
        iterator.remove();
      }
    }
  }
View Full Code Here

    log.debug("found {} cells in {} rows", cells.size(), rows);
    if (cells.size() == (rows * 2)) {
      Field lastField = null;
      log.debug("cells.size: {}", cells.size());
      for (int i = 0; i < cells.size(); i++) {
        Element labelElement = cells.get(i);
        Element valueElement = cells.get(++i);
        String label = labelElement.getTextExtractor().toString().trim().replaceAll(":$", "");
        String value = getValueFieldText(valueElement);
        log.debug("found field: {}={}", label, value);
        if (StringUtils.isEmpty(label) && lastField != null) {
          lastField.addValue(value);
View Full Code Here

TOP

Related Classes of net.htmlparser.jericho.Element

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.