Examples of ProtocolFactory


Examples of org.apache.droids.helper.factories.ProtocolFactory

  }
 
  public static Droid<Link> createSimpleExceptionCrawlingDroid(
      String targetURI) {
    ParserFactory parserFactory = createDefaultParserFactory();
    ProtocolFactory protocolFactory = createDefaultProtocolFactory();
    URLFiltersFactory filtersFactory = createDefaultURLFiltersFactory();

    SimpleDelayTimer simpleDelayTimer = new SimpleDelayTimer();
    simpleDelayTimer.setDelayMillis(100);
   
View Full Code Here

Examples of org.apache.droids.helper.factories.ProtocolFactory

    htmlParser.getElements().put("script", "src");
    parserFactory.setMap(new HashMap<String, Object>());
    parserFactory.getMap().put("text/html", htmlParser);

    // Create protocol factory. Support HTTP/S only.
    ProtocolFactory protocolFactory = new ProtocolFactory();
   
    // Create and configure HTTP client
    HttpParams params = new BasicHttpParams();
    HttpProtocolParamBean hppb = new HttpProtocolParamBean(params);
    HttpConnectionParamBean hcpb = new HttpConnectionParamBean(params);
    ConnManagerParamBean cmpb = new ConnManagerParamBean(params);
   
    // Set protocol parametes
    hppb.setVersion(HttpVersion.HTTP_1_1);
    hppb.setContentCharset(HTTP.ISO_8859_1);
    hppb.setUseExpectContinue(true);
    // Set connection parameters
    hcpb.setStaleCheckingEnabled(false);
    // Set connection manager parameters
    ConnPerRouteBean connPerRouteBean = new ConnPerRouteBean();
    connPerRouteBean.setDefaultMaxPerRoute(2);
    cmpb.setConnectionsPerRoute(connPerRouteBean);
   
    DroidsHttpClient httpclient = new DroidsHttpClient(params);
   
    HttpProtocol httpProtocol = new HttpProtocol(httpclient);
    protocolFactory.setMap(new HashMap<String, Object>());
    protocolFactory.getMap().put("http", httpProtocol);
    protocolFactory.getMap().put("https", httpProtocol);
   
    // Create URL filter factory.
    URLFiltersFactory filtersFactory = new URLFiltersFactory();
    RegexURLFilter defaultURLFilter = new RegexURLFilter();
    defaultURLFilter.setFile("classpath:/regex-urlfilter.txt");
View Full Code Here

Examples of org.apache.droids.helper.factories.ProtocolFactory

  }
 
  private final CrawlingDroid createDroid(final Queue<Link> queue) {
    final CrawlingDroid droid = new SysoutCrawlingDroid(queue, null);
   
    final ProtocolFactory protocolFactory = DroidsFactory.createDefaultProtocolFactory();
    droid.setProtocolFactory(protocolFactory);
   
    final ParserFactory parserFactory = parserSetup();
    droid.setParserFactory(parserFactory);
    return droid;
View Full Code Here

Examples of org.apache.geronimo.network.protocol.ProtocolFactory

        CallbackSocketProtocol spt = new CallbackSocketProtocol();
        // TODO configurable.
        spt.setTimeout(1000);
        spt.setSelectorManager(selectorManager);

        ProtocolFactory pf = new ProtocolFactory();
        pf.setClockPool(clockPool);
        // TODO configurable.
        pf.setMaxAge(Long.MAX_VALUE);
        pf.setMaxInactivity(1 * 60 * 60 * 1000);
        pf.setReclaimPeriod(500);
        pf.setTemplate(spt);
        pf.setAcceptedCallBack(this);

        serverSocketAcceptor.setAcceptorListener(pf);
        serverSocketAcceptor.setReuseAddress(true);
       
        try {
View Full Code Here

Examples of org.apache.nutch.protocol.ProtocolFactory

    Content content;
    Parse parse;

    Configuration conf = NutchConfiguration.create();
    ParseUtil parser = new ParseUtil(conf);
    ProtocolFactory factory = new ProtocolFactory(conf);
    for (int i = 0; i < sampleFiles.length; i++) {
      urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];

      protocol = factory.getProtocol(urlString);
      content = protocol.getProtocolOutput(new Text(urlString),
                                           new CrawlDatum()).getContent();
      parse = parser.parseByExtensionId("parse-msexcel", content).get(content.getUrl());

      assertTrue(parse.getText().equals(expectedText));
View Full Code Here

Examples of org.apache.nutch.protocol.ProtocolFactory

    for (int i = 0; i < sampleFiles.length; i++) {
      urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];

      Configuration conf = NutchConfiguration.create();
      protocol = new ProtocolFactory(conf).getProtocol(urlString);
      content = protocol.getProtocolOutput(new Text(urlString), new CrawlDatum()).getContent();
      parse = new ParseUtil(conf).parseByExtensionId("parse-pdf", content).get(content.getUrl());

      int index = parse.getText().indexOf(expectedText);
      assertTrue(index > 0);
View Full Code Here

Examples of org.apache.nutch.protocol.ProtocolFactory

    Configuration conf = NutchConfiguration.create();
    for (int i = 0; i < sampleFiles.length; i++) {
      urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];
      urlString = urlString.replace('\\', '/');

      protocol = new ProtocolFactory(conf).getProtocol(urlString);
      content = protocol.getProtocolOutput(new Text(urlString),
          new CrawlDatum()).getContent();

      parseResult = new ParseUtil(conf).parseByExtensionId("feed", content);
View Full Code Here

Examples of org.apache.nutch.protocol.ProtocolFactory

    Configuration conf = NutchConfiguration.create();

    for (int i = 0; i < sampleFiles.length; i++) {
      urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];

      protocol = new ProtocolFactory(conf).getProtocol(urlString);
      content = protocol.getProtocolOutput(new Text(urlString), new CrawlDatum()).getContent();

      parse = new ParseUtil(conf).parse(content).get(content.getUrl());

      String text = parse.getText().replaceAll("[ \t\r\n]+", " ").trim();
View Full Code Here

Examples of org.apache.nutch.protocol.ProtocolFactory

    Configuration conf = NutchConfiguration.create();
    for (int i = 0; i < sampleFiles.length; i++) {
      urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];

      protocol = new ProtocolFactory(conf).getProtocol(urlString);
      content = protocol.getProtocolOutput(new Text(urlString), new CrawlDatum()).getContent();
      parse = new ParseUtil(conf).parseByExtensionId("parse-zip",content).get(content.getUrl());
      Assert.assertTrue(parse.getText().equals(expectedText));
    }
  }
View Full Code Here

Examples of org.apache.nutch.protocol.ProtocolFactory

      LOG.info("fetching: " + url);
    }

    IndexingFilters indexers = new IndexingFilters(conf);

    ProtocolFactory factory = new ProtocolFactory(conf);
    Protocol protocol = factory.getProtocol(url);
    CrawlDatum datum = new CrawlDatum();

    ProtocolOutput output = protocol.getProtocolOutput(new Text(url), datum);
   
    IndexWriters writers = new IndexWriters(getConf());
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.