Examples of MimeUtil


Examples of org.apache.nutch.util.MimeUtil

    public void testIt() throws ProtocolException, ParseException, IOException {

  String urlString;
  Parse parse;
  Configuration conf = NutchConfiguration.create();
  MimeUtil mimeutil = new MimeUtil(conf);

  urlString = "file:" + sampleDir + fileSeparator + rtfFile;

  File file = new File(sampleDir + fileSeparator + rtfFile);
  byte[] bytes = new byte[(int) file.length()];
  DataInputStream in = new DataInputStream(new FileInputStream(file));
  in.readFully(bytes);
  in.close();

  WebPage page = new WebPage();
  page.setBaseUrl(new Utf8(urlString));
  page.setContent(ByteBuffer.wrap(bytes));
  String mtype = mimeutil.getMimeType(file);
  page.setContentType(new Utf8(mtype));

  parse = new ParseUtil(conf).parse(urlString, page);

  String title = parse.getTitle();
View Full Code Here

Examples of org.apache.nutch.util.MimeUtil

    this.maxContent = conf.getInt("http.content.limit", 64 * 1024);
    this.userAgent = getAgentString(conf.get("http.agent.name"), conf.get("http.agent.version"), conf
        .get("http.agent.description"), conf.get("http.agent.url"), conf.get("http.agent.email"));
    this.acceptLanguage = conf.get("http.accept.language", acceptLanguage);
    this.accept = conf.get("http.accept", accept);
    this.mimeTypes = new MimeUtil(conf);
    this.useHttp11 = conf.getBoolean("http.useHttp11", false);
    this.robots.setConf(conf);
    logConf();
  }
View Full Code Here

Examples of org.apache.nutch.util.MimeUtil

  public void addIndexBackendOptions(Configuration conf) {
  }

  public void setConf(Configuration conf) {
    this.conf = conf;
    MIME = new MimeUtil(conf);
  }
View Full Code Here

Examples of org.apache.nutch.util.MimeUtil

    Configuration conf = NutchConfiguration.create();

    WebPage page = new WebPage();
    page.setBaseUrl(new Utf8(url));
    page.setContent(ByteBuffer.wrap(bytes));
    MimeUtil mimeutil = new MimeUtil(conf);
    String mtype = mimeutil.getMimeType(file);
    page.setContentType(new Utf8(mtype));

    new ParseUtil(conf).parse(url, page);

    ByteBuffer bb = page.getFromMetadata(new Utf8("License-Url"));
View Full Code Here

Examples of org.apache.nutch.util.MimeUtil

    // TikaParser parser = new TikaParser();
    // parser.setConf(conf);
    WebPage page = new WebPage();
    page.setBaseUrl(new Utf8(url));
    page.setContent(ByteBuffer.wrap(bytes));
    MimeUtil mimeutil = new MimeUtil(conf);
    String mtype = mimeutil.getMimeType(file);
    page.setContentType(new Utf8(mtype));
    // Parse parse = parser.getParse(url, page);

    Parse parse = new ParseUtil(conf).parse(url, page);
View Full Code Here

Examples of org.apache.nutch.util.MimeUtil

    this.orig = url.toString();
    this.base = url.toString();
    this.file = file;
    this.conf = conf;
   
    MIME = new MimeUtil(conf);
    tika = new Tika();

    if (!"file".equals(url.getProtocol()))
      throw new FileException("Not a file url:" + url);
View Full Code Here

Examples of org.apache.nutch.util.MimeUtil

    this.orig = url.toString();
    this.base = url.toString();
    this.file = file;
    this.conf = conf;
   
    MIME = new MimeUtil(conf);

    if (!"file".equals(url.getProtocol()))
      throw new FileException("Not a file url:" + url);

    if (File.LOG.isTraceEnabled()) {
View Full Code Here

Examples of org.apache.nutch.util.MimeUtil

 
 
  /** Creates a new instance of ZipTextExtractor */
  public ZipTextExtractor(Configuration conf) {
    this.conf = conf;
    this.MIME = new MimeUtil(conf);
  }
View Full Code Here

Examples of org.apache.nutch.util.MimeUtil

    return doc;
  }

  public void setConf(Configuration conf) {
    this.conf = conf;
    MIME = new MimeUtil(conf);
  }
View Full Code Here

Examples of org.apache.nutch.util.MimeUtil

    this.url = url;
    this.base = base;
    this.content = content;
    this.metadata = metadata;

    this.mimeTypes = new MimeUtil(conf);
    this.contentType = getContentType(contentType, url, content);
  }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.