Package com.ontometrics.scraper.extraction

Source Code of com.ontometrics.scraper.extraction.SplicingExtractor

package com.ontometrics.scraper.extraction;

import net.htmlparser.jericho.Element;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.ontometrics.scraper.TagOccurrence;
import com.ontometrics.scraper.util.ScraperUtil;

public class SplicingExtractor extends Manipulator {

  private static final Logger log = LoggerFactory.getLogger(SplicingExtractor.class);

  private SpliceOperation operation = SpliceOperation.After;

  private TagOccurrence tagOccurrence;

  public SplicingExtractor(SpliceOperation operation, TagOccurrence tagOccurrence) {
    setType(OperationType.Manipulator);
    this.operation = operation;
    this.tagOccurrence = tagOccurrence;
  }

  public SpliceOperation getOperation() {
    return operation;
  }

  @Override
  public String performExtraction() {
    String extractedSource = "";
    if (tagOccurrence.getIdentifier() != null) {
      log.debug("about to splice: {}", tagOccurrence);
      Element element = ScraperUtil.extract(getSource(), tagOccurrence);
      if (element != null) {
        extractedSource = element.toString();
      }
      log.debug("spliced out: {}", extractedSource);
    } else {
      extractedSource = ScraperUtil.extract(getSource().toString(), tagOccurrence.getTag(),
          tagOccurrence.getOccurrence());
    }
    return extractedSource;
  }

  @Override
  public void setMatcher(String matcher) {
    this.tagOccurrence.setMatcher(matcher);
  }

}
TOP

Related Classes of com.ontometrics.scraper.extraction.SplicingExtractor

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.