Examples of URLCrawlDatum


Examples of org.apache.nutch.crawl.CrawlDBTestUtil.URLCrawlDatum

   */
  @Test
  public void testUrl404Purging() throws Exception {
    // create a CrawlDatum with DB GONE status
    ArrayList<URLCrawlDatum> list = new ArrayList<URLCrawlDatum>();
    list.add(new URLCrawlDatum(new Text("http://www.example.com"), new CrawlDatum(
      CrawlDatum.STATUS_DB_GONE, 0, 0.0f)));
    list.add(new URLCrawlDatum(new Text("http://www.example1.com"), new CrawlDatum(
      CrawlDatum.STATUS_DB_FETCHED, 0, 0.0f)));
    list.add(new URLCrawlDatum(new Text("http://www.example2.com"), new CrawlDatum(
      CrawlDatum.STATUS_DB_UNFETCHED, 0, 0.0f)));
    dbDir = new Path(testdir, "crawldb");
    newCrawlDb = new Path(testdir,"newcrawldb");
    // create crawldb
    CrawlDBTestUtil.createCrawlDb(conf, fs, dbDir, list);
View Full Code Here

Examples of org.apache.nutch.crawl.CrawlDBTestUtil.URLCrawlDatum

      Text key = new Text();
      CrawlDatum value = new CrawlDatum();
      if (!reader.next(key, value)) {
        break READ;
      }
      l.add(new URLCrawlDatum(key, value));
    } while (true);

    reader.close();
    return l;
  }
View Full Code Here

Examples of org.apache.nutch.crawl.CrawlDBTestUtil.URLCrawlDatum

      Text key = new Text();
      CrawlDatum value = new CrawlDatum();
      if (!reader.next(key, value)) {
        break READ;
      }
      l.add(new URLCrawlDatum(key, value));
    } while (true);

    reader.close();
    return l;
  }
View Full Code Here

Examples of org.apache.nutch.crawl.CrawlDBTestUtil.URLCrawlDatum

      Text key = new Text();
      CrawlDatum value = new CrawlDatum();
      if (!reader.next(key, value)) {
        break READ;
      }
      l.add(new URLCrawlDatum(key, value));
    } while (true);

    reader.close();
    return l;
  }
View Full Code Here

Examples of org.apache.nutch.crawl.CrawlDBTestUtil.URLCrawlDatum

      Text key = new Text();
      CrawlDatum value = new CrawlDatum();
      if (!reader.next(key, value)) {
        break READ;
      }
      l.add(new URLCrawlDatum(key, value));
    } while (true);

    reader.close();
    return l;
  }
View Full Code Here

Examples of org.apache.nutch.crawl.CrawlDBTestUtil.URLCrawlDatum

   * @throws Exception
   */
  public void testUrl404Purging() throws Exception {
    // create a CrawlDatum with DB GONE status
    ArrayList<URLCrawlDatum> list = new ArrayList<URLCrawlDatum>();
    list.add(new URLCrawlDatum(new Text("http://www.example.com"), new CrawlDatum(
      CrawlDatum.STATUS_DB_GONE, 0, 0.0f)));
    list.add(new URLCrawlDatum(new Text("http://www.example1.com"), new CrawlDatum(
      CrawlDatum.STATUS_DB_FETCHED, 0, 0.0f)));
    list.add(new URLCrawlDatum(new Text("http://www.example2.com"), new CrawlDatum(
      CrawlDatum.STATUS_DB_UNFETCHED, 0, 0.0f)));
    dbDir = new Path(testdir, "crawldb");
    newCrawlDb = new Path(testdir,"newcrawldb");
    // create crawldb
    CrawlDBTestUtil.createCrawlDb(conf, fs, dbDir, list);
View Full Code Here

Examples of org.apache.nutch.crawl.CrawlDBTestUtil.URLCrawlDatum

      Text key = new Text();
      CrawlDatum value = new CrawlDatum();
      if (!reader.next(key, value)) {
        break READ;
      }
      l.add(new URLCrawlDatum(key, value));
    } while (true);

    reader.close();
    return l;
  }
View Full Code Here

Examples of org.apache.nutch.crawl.CrawlDBTestUtil.URLCrawlDatum

      Text key = new Text();
      CrawlDatum value = new CrawlDatum();
      if (!reader.next(key, value)) {
        break READ;
      }
      l.add(new URLCrawlDatum(key, value));
    } while (true);

    reader.close();
    return l;
  }
View Full Code Here

Examples of org.apache.nutch.crawl.CrawlDBTestUtil.URLCrawlDatum

      Text key = new Text();
      CrawlDatum value = new CrawlDatum();
      if (!reader.next(key, value)) {
        break READ;
      }
      l.add(new URLCrawlDatum(key, value));
    } while (true);

    reader.close();
    return l;
  }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.