Examples of fullSequentialParse()


Examples of net.htmlparser.jericho.Source.fullSequentialParse()

  public static String extractTagMatching(String html, TagOccurrence toGet) {
    log.debug("looking for {} in tags: {}", toGet.getMatching(), toGet.getTag());
    String found = null;
    Source source = new Source(html);
    source.fullSequentialParse();
    log.debug("source = {}", source);
    List<Element> elements = source.getAllElements(HTMLElementName.TABLE);
    for (Element element : elements) {
      log.debug("this element = {}", element);
      String elementText = element.getTextExtractor().toString();
View Full Code Here

Examples of net.htmlparser.jericho.Source.fullSequentialParse()

  }

  public static String extractSessionId(URL url, String sessionIDName) throws IOException {
    String sessionID = null;
    Source source = new Source(url);
    source.fullSequentialParse();
    List<Element> links = source.getAllElements(HTMLElementName.A);
    for (Element link : links) {
      // log.info("link: {}", link.toString());
      String href = link.getAttributeValue("href");
      if (href != null && href.contains(sessionIDName)) {
View Full Code Here

Examples of net.htmlparser.jericho.Source.fullSequentialParse()

      throw new IllegalStateException("Manipulator " + this.getClass().getName() + " returned null.");
    }
    if (type == OperationType.Manipulator) {
      log.debug("reassigning source..");
      Source newSource = new Source(result);
      newSource.fullSequentialParse();
      this.source = newSource;
    }
    if (successor != null) {
      successor.execute(this.source);
    }
View Full Code Here

Examples of net.htmlparser.jericho.Source.fullSequentialParse()

  private String extractTagMatching(String html, TagOccurrence toGet) {
    log.debug("looking for {} in tags: {}", toGet.getMatching(), toGet.getTag());
    String found = null;
    Source source = new Source(html);
    source.fullSequentialParse();
    List<Element> elements = source.getAllElements(HTMLElementName.TABLE);
    for (Element element : elements) {
      String elementText = element.getTextExtractor().toString();
      if (elementText.contains(toGet.getMatching())) {
        found = element.toString();
View Full Code Here

Examples of net.htmlparser.jericho.Source.fullSequentialParse()

      source = new Source(this.url);
    } catch (FileNotFoundException e) {
      log.info("Error while sourcing URL = {}, error description = {}", this.url, e.toString());
      return new ArrayList<Field>();
    }
    source.fullSequentialParse();

    List<Element> tables = source.getAllElements(HTMLElementName.TABLE);

    for (Element table : tables) {
      extractedFields.addAll(extractFieldsFromTable(table.toString()));
View Full Code Here

Examples of net.htmlparser.jericho.Source.fullSequentialParse()

  private List<Field> extractFieldsFromTable(String html) {
    // log.debug("extracting fields from table: {}", html);
    List<Field> extractedFields = new ArrayList<Field>();
    Source source = new Source(html);
    source.fullSequentialParse();
    List<Element> cells = source.getAllElements(HTMLElementName.TD);
    int rows = source.getAllElements(HTMLElementName.TR).size();
    log.debug("found {} cells in {} rows", cells.size(), rows);
    if (cells.size() == (rows * 2)) {
      Field lastField = null;
View Full Code Here

Examples of net.htmlparser.jericho.Source.fullSequentialParse()

  }

  private List<Field> extractFieldsFromDL(String html) {
    List<Field> extractedFields = new ArrayList<Field>();
    Source source = new Source(html);
    source.fullSequentialParse();
    List<Element> labels = source.getAllElements(HTMLElementName.DT);
    List<Element> values = source.getAllElements(HTMLElementName.DD);
    int cellCount = Math.min(labels.size(), values.size());
    for (int i = 0; i < cellCount; i++) {
      String label = labels.get(i).getTextExtractor().toString().trim().replaceAll(":$", "");
View Full Code Here

Examples of net.htmlparser.jericho.Source.fullSequentialParse()

  private List<Field> extractFieldsFromTable(String html) {
    log.debug("extracting fields from table: {}", html);
    List<Field> extractedFields = new ArrayList<Field>();
    Source source = new Source(html);
    source.fullSequentialParse();
    int cellCount = source.getAllElements(HTMLElementName.TD).size();
    int rowCount = source.getAllElements(HTMLElementName.TR).size();
    log.debug("found {} cells in {} rows", cellCount, rowCount);
    if (cellCount == (rowCount * 2)) {
      Field lastField = null;
View Full Code Here

Examples of net.htmlparser.jericho.Source.fullSequentialParse()

  }

  private List<Field> extractFieldsFromDL(String html) {
    List<Field> extractedFields = new ArrayList<Field>();
    Source source = new Source(html);
    source.fullSequentialParse();
    List<Element> labels = source.getAllElements(HTMLElementName.DT);
    List<Element> values = source.getAllElements(HTMLElementName.DD);
    int cellCount = Math.min(labels.size(), values.size());
    for (int i = 0; i < cellCount; i++) {
      String label = labels.get(i).getTextExtractor().toString().trim().replaceAll(":$", "");
View Full Code Here

Examples of net.htmlparser.jericho.Source.fullSequentialParse()

  }

  private List<Field> extractFieldsFromUL(String html) {
    List<Field> extractedFields = new ArrayList<Field>();
    Source source = new Source(html);
    source.fullSequentialParse();
    List<Element> lis = source.getAllElements(HTMLElementName.LI);
    for (Element li : lis) {
      log.debug("looking at li: {} w/text: {}", li, li.getTextExtractor().toString());
      String[] parts = li.getTextExtractor().toString().split(":");
      if (parts.length == 2) {
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.