Examples of ProtocolFactory


Examples of org.apache.nutch.protocol.ProtocolFactory

    Content content;
    Parse parse;

    Configuration conf = NutchConfiguration.create();
    urlString = "file:" + sampleDir + fileSeparator + id3v2;
    protocol = new ProtocolFactory(conf).getProtocol(urlString);
    content = protocol.getProtocolOutput(new Text(urlString), new CrawlDatum())
                      .getContent();
    parse = new ParseUtil(conf).parseByExtensionId("parse-mp3", content);
    Metadata metadata = parse.getData().getParseMeta();
    assertEquals("postgresql comment id3v2", metadata.get("COMM-Text"));
View Full Code Here

Examples of org.apache.nutch.protocol.ProtocolFactory

    Content content;
    Parse parse;

    Configuration conf = NutchConfiguration.create();
    urlString = "file:" + sampleDir + fileSeparator + id3v1;
    protocol = new ProtocolFactory(conf).getProtocol(urlString);
    content = protocol.getProtocolOutput(new Text(urlString), new CrawlDatum())
                      .getContent();
    parse = new ParseUtil(conf).parseByExtensionId("parse-mp3", content);

    Metadata metadata = parse.getData().getParseMeta();
View Full Code Here

Examples of org.apache.nutch.protocol.ProtocolFactory

    Content content;
    Parse parse;

    Configuration conf = NutchConfiguration.create();
    urlString = "file:" + sampleDir + fileSeparator + none;
    protocol = new ProtocolFactory(conf).getProtocol(urlString);
    content = protocol.getProtocolOutput(new Text(urlString), new CrawlDatum())
                      .getContent();
    parse = new ParseUtil(conf).parseByExtensionId("parse-mp3", content);
//    Metadata metadata = parse.getData().getParseMeta();
    if (parse.getData().getStatus().isSuccess()) {
View Full Code Here

Examples of org.apache.nutch.protocol.ProtocolFactory

   * @return contents of url
   */
  private static String getUrlContent(String url, Configuration conf) {
    Protocol protocol;
    try {
      protocol = new ProtocolFactory(conf).getProtocol(url);
      Content content = protocol.getProtocolOutput(new Text(url), new CrawlDatum()).getContent();
      Parse parse = new ParseUtil(conf).parse(content);
      System.out.println("text:" + parse.getText());
      return parse.getText();

View Full Code Here

Examples of org.apache.nutch.protocol.ProtocolFactory

    //LOG.setLevel(Level.FINE);
    String url = args[0];
    Configuration conf = NutchConfiguration.create();
    RSSParser parser = new RSSParser();
    parser.setConf(conf);
    Protocol protocol = new ProtocolFactory(conf).getProtocol(url);
    Content content = protocol.getProtocolOutput(new Text(url), new CrawlDatum()).getContent();
    Parse parse = parser.getParse(content).get(content.getUrl());
    System.out.println("data: "+ parse.getData());
    System.out.println("text: "+parse.getText());
  }
View Full Code Here

Examples of org.apache.nutch.protocol.ProtocolFactory

    Content content;
    Parse parse;

    Configuration conf = NutchConfiguration.create();
    urlString = "file:" + sampleDir + fileSeparator + none;
    protocol = new ProtocolFactory(conf).getProtocol(urlString);
    content = protocol.getProtocolOutput(new Text(urlString), new CrawlDatum())
                      .getContent();
    parse = new ParseUtil(conf).parseByExtensionId("parse-mp3", content).get(urlString);
//    Metadata metadata = parse.getData().getParseMeta();
    if (parse.getData().getStatus().isSuccess()) {
View Full Code Here

Examples of org.apache.nutch.protocol.ProtocolFactory

    Content content;
    Parse parse;

    Configuration conf = NutchConfiguration.create();
    urlString = "file:" + sampleDir + fileSeparator + id3v2;
    protocol = new ProtocolFactory(conf).getProtocol(urlString);
    content = protocol.getProtocolOutput(new Text(urlString), new CrawlDatum())
                      .getContent();
    parse = new ParseUtil(conf).parseByExtensionId("parse-mp3", content).get(urlString);
    Metadata metadata = parse.getData().getParseMeta();
    assertEquals("postgresql comment id3v2", metadata.get("COMM-Text"));
View Full Code Here

Examples of org.apache.nutch.protocol.ProtocolFactory

    Content content;
    Parse parse;

    Configuration conf = NutchConfiguration.create();
    urlString = "file:" + sampleDir + fileSeparator + id3v1;
    protocol = new ProtocolFactory(conf).getProtocol(urlString);
    content = protocol.getProtocolOutput(new Text(urlString), new CrawlDatum())
                      .getContent();
    parse = new ParseUtil(conf).parseByExtensionId("parse-mp3", content).get(urlString);

    Metadata metadata = parse.getData().getParseMeta();
View Full Code Here

Examples of org.apache.nutch.protocol.ProtocolFactory

    for (int i = 0; i < sampleFiles.length; i++) {
      urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];

      Configuration conf = NutchConfiguration.create();
      protocol = new ProtocolFactory(conf).getProtocol(urlString);
      content = protocol.getProtocolOutput(new Text(urlString), new CrawlDatum()).getContent();
      parse = new ParseUtil(conf).parseByExtensionId("parse-tika", content).get(content.getUrl());

      assertEquals("121", parse.getData().getMeta("width"));
      assertEquals("48", parse.getData().getMeta("height"));
View Full Code Here

Examples of org.apache.nutch.protocol.ProtocolFactory

  protected void tearDown() {}

  public String getTextContent(String fileName) throws ProtocolException, ParseException {
    String urlString = "file:" + sampleDir + fileSeparator + fileName;
    Protocol protocol = new ProtocolFactory(conf).getProtocol(urlString);
    Content content = protocol.getProtocolOutput(new Text(urlString), new CrawlDatum()).getContent();
    Parse parse = new ParseUtil(conf).parseByExtensionId("parse-msword", content).get(content.getUrl());
    return parse.getText();
  }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.