Package org.apache.nutch.plugin

Examples of org.apache.nutch.plugin.Extension


      impls = new HashSet(Arrays.asList(names));
    }
    Extension[] extensions = this.extensionPoint.getExtensions();
    HashMap normalizerExtensions = new HashMap();
    for (int i = 0; i < extensions.length; i++) {
      Extension extension = extensions[i];
      if (impls != null && !impls.contains(extension.getClazz()))
        continue;
      normalizerExtensions.put(extension.getClazz(), extension);
    }
    List res = new ArrayList();
    if (orders == null) {
      res.addAll(normalizerExtensions.values());
    } else {
      // first add those explicitly named in correct order
      for (int i = 0; i < orders.length; i++) {
        Extension e = (Extension)normalizerExtensions.get(orders[i]);
        if (e != null) {
          res.add(e);
          normalizerExtensions.remove(orders[i]);
        }
      }
View Full Code Here


    String pluginName = "urlfilter-prefix";
    Extension[] extensions = PluginRepository.get(conf).getExtensionPoint(
        URLFilter.class.getName()).getExtensions();
    for (int i = 0; i < extensions.length; i++) {
      Extension extension = extensions[i];
      if (extension.getDescriptor().getPluginId().equals(pluginName)) {
        attributeFile = extension.getAttribute("file");
        break;
      }
    }
    if (attributeFile != null && attributeFile.trim().equals(""))
      attributeFile = null;
View Full Code Here

      parserFactory = new ParserFactory(conf);
  }
   
  /** Unit test for <code>getExtensions(String)</code> method. */
  public void testGetExtensions() throws Exception {
    Extension ext = (Extension)parserFactory.getExtensions("text/html").get(0);
    assertEquals("parse-html", ext.getDescriptor().getPluginId());
    ext = (Extension) parserFactory.getExtensions("text/html; charset=ISO-8859-1").get(0);
    assertEquals("parse-html", ext.getDescriptor().getPluginId());
    ext = (Extension)parserFactory.getExtensions("foo/bar").get(0);
    assertEquals("parse-text", ext.getDescriptor().getPluginId());
  }
View Full Code Here

        "org.apache.nutch.parse.Parser").getExtensions();

    String contentType, command, timeoutString;

    for (int i = 0; i < extensions.length; i++) {
      Extension extension = extensions[i];

      // only look for extensions defined by plugin parse-ext
      if (!extension.getDescriptor().getPluginId().equals("parse-ext"))
        continue;

      contentType = extension.getAttribute("contentType");
      if (contentType == null || contentType.equals(""))
        continue;

      command = extension.getAttribute("command");
      if (command == null || command.equals(""))
        continue;

      timeoutString = extension.getAttribute("timeout");
      if (timeoutString == null || timeoutString.equals(""))
        timeoutString = "" + TIMEOUT_DEFAULT;

      TYPE_PARAMS_MAP.put(contentType, new String[] { command, timeoutString });
    }
View Full Code Here

   * used. If none match, then the {@link NutchDocumentAnalyzer} is used.
   */
  public NutchAnalyzer get(String lang) {

    NutchAnalyzer analyzer = DEFAULT_ANALYZER;
    Extension extension = getExtension(lang);
    if (extension != null) {
        try {
            analyzer = (NutchAnalyzer) extension.getExtensionInstance();
        } catch (PluginRuntimeException pre) {
            analyzer = DEFAULT_ANALYZER;
        }
    }
    return analyzer;
View Full Code Here

  }

  private Extension getExtension(String lang) {

    if (lang == null) { return null; }
    Extension extension = (Extension) this.conf.getObject(lang);
    if (extension == null) {
      extension = findExtension(lang);
      if (extension != null) {
        this.conf.setObject(lang, extension);
      }
View Full Code Here

    this.conf = conf;

    String pluginName = "urlfilter-suffix";
    Extension[] extensions = PluginRepository.get(conf).getExtensionPoint(URLFilter.class.getName()).getExtensions();
    for (int i = 0; i < extensions.length; i++) {
      Extension extension = extensions[i];
      if (extension.getDescriptor().getPluginId().equals(pluginName)) {
        attributeFile = extension.getAttribute("file");
        break;
      }
    }
    if (attributeFile != null && attributeFile.trim().equals("")) attributeFile = null;
    if (attributeFile != null) {
View Full Code Here

      throw new RuntimeException(URLFilter.X_POINT_ID+" not found.");

    Extension[] extensions = point.getExtensions();

    for (int i = 0; i < extensions.length; i++) {
      Extension extension = extensions[i];
      filter = (URLFilter)extension.getExtensionInstance();
      //LOG.info(filter.getClass().getName());
      if (filter.getClass().getName().equals(filterName)) {
        break;
      } else {
        filter = null;
View Full Code Here

   
    List<URLNormalizer> normalizers = new Vector<URLNormalizer>(extensions.size());

    Iterator<Extension> it = extensions.iterator();
    while (it.hasNext()) {
      Extension ext = it.next();
      URLNormalizer normalizer = null;
      try {
        // check to see if we've cached this URLNormalizer instance yet
        normalizer = (URLNormalizer) objectCache.getObject(ext.getId());
        if (normalizer == null) {
          // go ahead and instantiate it and then cache it
          normalizer = (URLNormalizer) ext.getExtensionInstance();
          objectCache.setObject(ext.getId(), normalizer);
        }
        normalizers.add(normalizer);
      } catch (PluginRuntimeException e) {
        e.printStackTrace();
        LOG.warn("URLNormalizers:PluginRuntimeException when "
                + "initializing url normalizer plugin "
                + ext.getDescriptor().getPluginId()
                + " instance in getURLNormalizers "
                + "function: attempting to continue instantiating plugins");
      }
    }
    return normalizers.toArray(new URLNormalizer[normalizers
View Full Code Here

      impls = new HashSet<String>(Arrays.asList(names));
    }
    Extension[] extensions = this.extensionPoint.getExtensions();
    HashMap<String, Extension> normalizerExtensions = new HashMap<String, Extension>();
    for (int i = 0; i < extensions.length; i++) {
      Extension extension = extensions[i];
      if (impls != null && !impls.contains(extension.getClazz()))
        continue;
      normalizerExtensions.put(extension.getClazz(), extension);
    }
    List<Extension> res = new ArrayList<Extension>();
    if (orders == null) {
      res.addAll(normalizerExtensions.values());
    } else {
      // first add those explicitly named in correct order
      for (int i = 0; i < orders.length; i++) {
        Extension e = normalizerExtensions.get(orders[i]);
        if (e != null) {
          res.add(e);
          normalizerExtensions.remove(orders[i]);
        }
      }
View Full Code Here

TOP

Related Classes of org.apache.nutch.plugin.Extension

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.