Package org.apache.nutch.plugin

Examples of org.apache.nutch.plugin.Extension


    String pluginName = "urlfilter-prefix";
    Extension[] extensions = PluginRepository.get(conf).getExtensionPoint(
        URLFilter.class.getName()).getExtensions();
    for (int i = 0; i < extensions.length; i++) {
      Extension extension = extensions[i];
      if (extension.getDescriptor().getPluginId().equals(pluginName)) {
        attributeFile = extension.getAttribute("file");
        break;
      }
    }
    if (attributeFile != null && attributeFile.trim().equals(""))
      attributeFile = null;
View Full Code Here


    // get the extensions for domain urlfilter
    String pluginName = "urlfilter-domain";
    Extension[] extensions = PluginRepository.get(conf).getExtensionPoint(
      URLFilter.class.getName()).getExtensions();
    for (int i = 0; i < extensions.length; i++) {
      Extension extension = extensions[i];
      if (extension.getDescriptor().getPluginId().equals(pluginName)) {
        attributeFile = extension.getAttribute("file");
        break;
      }
    }
   
    // handle blank non empty input
View Full Code Here

      throw new ParserNotFound(url, contentType);
    }

    parsers = new Vector<Parser>(parserExts.size());
    for (Iterator i=parserExts.iterator(); i.hasNext(); ){
      Extension ext = (Extension) i.next();
      Parser p = null;
      try {
        //check to see if we've cached this parser instance yet
        p = (Parser) objectCache.getObject(ext.getId());
        if (p == null) {
          // go ahead and instantiate it and then cache it
          p = (Parser) ext.getExtensionInstance();
          objectCache.setObject(ext.getId(),p);
        }
        parsers.add(p);
      } catch (PluginRuntimeException e) {
        if (LOG.isWarnEnabled()) {
          e.printStackTrace(LogUtil.getWarnStream(LOG));
          LOG.warn("ParserFactory:PluginRuntimeException when "
                 + "initializing parser plugin "
                 + ext.getDescriptor().getPluginId()
                 + " instance in getParsers "
                 + "function: attempting to continue instantiating parsers");
        }
      }
    }
View Full Code Here

   
    List<URLNormalizer> normalizers = new Vector<URLNormalizer>(extensions.size());

    Iterator<Extension> it = extensions.iterator();
    while (it.hasNext()) {
      Extension ext = it.next();
      URLNormalizer normalizer = null;
      try {
        // check to see if we've cached this URLNormalizer instance yet
        normalizer = (URLNormalizer) objectCache.getObject(ext.getId());
        if (normalizer == null) {
          // go ahead and instantiate it and then cache it
          normalizer = (URLNormalizer) ext.getExtensionInstance();
          objectCache.setObject(ext.getId(), normalizer);
        }
        normalizers.add(normalizer);
      } catch (PluginRuntimeException e) {
        e.printStackTrace();
        LOG.warn("URLNormalizers:PluginRuntimeException when "
                + "initializing url normalizer plugin "
                + ext.getDescriptor().getPluginId()
                + " instance in getURLNormalizers "
                + "function: attempting to continue instantiating plugins");
      }
    }
    return normalizers.toArray(new URLNormalizer[normalizers
View Full Code Here

   *         {@link PluginRuntimeException} instantiating the {@link Parser}.
   */
  public Parser getParserById(String id) throws ParserNotFound {

    Extension[] extensions = this.extensionPoint.getExtensions();
    Extension parserExt = null;

    ObjectCache objectCache = ObjectCache.get(conf);
   
    if (id != null) {
      parserExt = getExtension(extensions, id);
    }
    if (parserExt == null) {
      parserExt = getExtensionFromAlias(extensions, id);
    }

    if (parserExt == null) {
      throw new ParserNotFound("No Parser Found for id [" + id + "]");
    }
   
    // first check the cache          
    if (objectCache.getObject(parserExt.getId()) != null) {
      return (Parser) objectCache.getObject(parserExt.getId());

    // if not found in cache, instantiate the Parser   
    } else {
      try {
        Parser p = (Parser) parserExt.getExtensionInstance();
        objectCache.setObject(parserExt.getId(), p);
        return p;
      } catch (PluginRuntimeException e) {
        if (LOG.isWarnEnabled()) {
          LOG.warn("Canno initialize parser " +
                   parserExt.getDescriptor().getPluginId() +
                   " (cause: " + e.toString());
        }
        throw new ParserNotFound("Cannot init parser for id [" + id + "]");
      }
    }
View Full Code Here

      impls = new HashSet<String>(Arrays.asList(names));
    }
    Extension[] extensions = this.extensionPoint.getExtensions();
    HashMap<String, Extension> normalizerExtensions = new HashMap<String, Extension>();
    for (int i = 0; i < extensions.length; i++) {
      Extension extension = extensions[i];
      if (impls != null && !impls.contains(extension.getClazz()))
        continue;
      normalizerExtensions.put(extension.getClazz(), extension);
    }
    List<Extension> res = new ArrayList<Extension>();
    if (orders == null) {
      res.addAll(normalizerExtensions.values());
    } else {
      // first add those explicitly named in correct order
      for (int i = 0; i < orders.length; i++) {
        Extension e = normalizerExtensions.get(orders[i]);
        if (e != null) {
          res.add(e);
          normalizerExtensions.remove(orders[i]);
        }
      }
View Full Code Here

    List<Extension> extList = new ArrayList<Extension>();
    if (plugins != null) {
     
      for (String parsePluginId : plugins) {
       
        Extension ext = getExtension(extensions, parsePluginId, contentType);
        // the extension returned may be null
        // that means that it was not enabled in the plugin.includes
        // nutch conf property, but it was mapped in the
        // parse-plugins.xml
        // file.
        // OR it was enabled in plugin.includes, but the plugin's plugin.xml
        // file does not claim that the plugin supports the specified mimeType
        // in either case, LOG the appropriate error message to WARN level
       
        if (ext == null) {
          //try to get it just by its pluginId
          ext = getExtension(extensions, parsePluginId);
         
          if (LOG.isWarnEnabled()) {
            if (ext != null) {
              // plugin was enabled via plugin.includes
              // its plugin.xml just doesn't claim to support that
              // particular mimeType
              LOG.warn("ParserFactory:Plugin: " + parsePluginId +
                       " mapped to contentType " + contentType +
                       " via parse-plugins.xml, but " + "its plugin.xml " +
                       "file does not claim to support contentType: " +
                       contentType);
            } else {
              // plugin wasn't enabled via plugin.includes
              LOG.warn("ParserFactory: Plugin: " + parsePluginId +
                       " mapped to contentType " + contentType +
                       " via parse-plugins.xml, but not enabled via " +
                       "plugin.includes in nutch-default.xml");                    
            }
          }
        }

        if (ext != null) {
          // add it to the list
          extList.add(ext);
        }
      }
     
    } else {
      // okay, there were no list of plugins defined for
      // this mimeType, however, there may be plugins registered
      // via the plugin.includes nutch conf property that claim
      // via their plugin.xml file to support this contentType
      // so, iterate through the list of extensions and if you find
      // any extensions where this is the case, throw a
      // NotMappedParserException
     
      for (int i=0; i<extensions.length; i++) {
        if ("*".equals(extensions[i].getAttribute("contentType"))){
          extList.add(0, extensions[i]);
        }
        else if (extensions[i].getAttribute("contentType") != null
            && contentType.matches(escapeContentType(extensions[i].getAttribute("contentType")))) {
          extList.add(extensions[i]);
        }
      }
     
      if (extList.size() > 0) {
        if (LOG.isInfoEnabled()) {
          StringBuffer extensionsIDs = new StringBuffer("[");
          boolean isFirst = true;
          for (Extension ext : extList){
            if (!isFirst) extensionsIDs.append(" - ");
            else isFirst=false;
            extensionsIDs.append(ext.getId());
          }
        extensionsIDs.append("]");
          LOG.info("The parsing plugins: " + extensionsIDs.toString() +
                   " are enabled via the plugin.includes system " +
                   "property, and all claim to support the content type " +
View Full Code Here

      throw new ParserNotFound(url, contentType);
    }

    parsers = new Vector<Parser>(parserExts.size());
    for (Iterator i=parserExts.iterator(); i.hasNext(); ){
      Extension ext = (Extension) i.next();
      Parser p = null;
      try {
        //check to see if we've cached this parser instance yet
        p = (Parser) objectCache.getObject(ext.getId());
        if (p == null) {
          // go ahead and instantiate it and then cache it
          p = (Parser) ext.getExtensionInstance();
          objectCache.setObject(ext.getId(),p);
        }
        parsers.add(p);
      } catch (PluginRuntimeException e) {
        if (LOG.isWarnEnabled()) {
          LOG.warn("ParserFactory:PluginRuntimeException when "
                 + "initializing parser plugin "
                 + ext.getDescriptor().getPluginId()
                 + " instance in getParsers "
                 + "function: attempting to continue instantiating parsers");
        }
      }
    }
View Full Code Here

   *         {@link PluginRuntimeException} instantiating the {@link Parser}.
   */
  public Parser getParserById(String id) throws ParserNotFound {

    Extension[] extensions = this.extensionPoint.getExtensions();
    Extension parserExt = null;

    ObjectCache objectCache = ObjectCache.get(conf);
   
    if (id != null) {
      parserExt = getExtension(extensions, id);
    }
    if (parserExt == null) {
      parserExt = getExtensionFromAlias(extensions, id);
    }

    if (parserExt == null) {
      throw new ParserNotFound("No Parser Found for id [" + id + "]");
    }
   
    // first check the cache          
    if (objectCache.getObject(parserExt.getId()) != null) {
      return (Parser) objectCache.getObject(parserExt.getId());

    // if not found in cache, instantiate the Parser   
    } else {
      try {
        Parser p = (Parser) parserExt.getExtensionInstance();
        objectCache.setObject(parserExt.getId(), p);
        return p;
      } catch (PluginRuntimeException e) {
        if (LOG.isWarnEnabled()) {
          LOG.warn("Canno initialize parser " +
                   parserExt.getDescriptor().getPluginId() +
                   " (cause: " + e.toString());
        }
        throw new ParserNotFound("Cannot init parser for id [" + id + "]");
      }
    }
View Full Code Here

    List<Extension> extList = new ArrayList<Extension>();
    if (plugins != null) {
     
      for (String parsePluginId : plugins) {
       
        Extension ext = getExtension(extensions, parsePluginId, contentType);
        // the extension returned may be null
        // that means that it was not enabled in the plugin.includes
        // nutch conf property, but it was mapped in the
        // parse-plugins.xml
        // file.
        // OR it was enabled in plugin.includes, but the plugin's plugin.xml
        // file does not claim that the plugin supports the specified mimeType
        // in either case, LOG the appropriate error message to WARN level
       
        if (ext == null) {
          //try to get it just by its pluginId
          ext = getExtension(extensions, parsePluginId);
         
          if (LOG.isWarnEnabled()) {
            if (ext != null) {
              // plugin was enabled via plugin.includes
              // its plugin.xml just doesn't claim to support that
              // particular mimeType
              LOG.warn("ParserFactory:Plugin: " + parsePluginId +
                       " mapped to contentType " + contentType +
                       " via parse-plugins.xml, but " + "its plugin.xml " +
                       "file does not claim to support contentType: " +
                       contentType);
            } else {
              // plugin wasn't enabled via plugin.includes
              LOG.warn("ParserFactory: Plugin: " + parsePluginId +
                       " mapped to contentType " + contentType +
                       " via parse-plugins.xml, but not enabled via " +
                       "plugin.includes in nutch-default.xml");                    
            }
          }
        }

        if (ext != null) {
          // add it to the list
          extList.add(ext);
        }
      }
     
    } else {
      // okay, there were no list of plugins defined for
      // this mimeType, however, there may be plugins registered
      // via the plugin.includes nutch conf property that claim
      // via their plugin.xml file to support this contentType
      // so, iterate through the list of extensions and if you find
      // any extensions where this is the case, throw a
      // NotMappedParserException
     
      for (int i=0; i<extensions.length; i++) {
        if ("*".equals(extensions[i].getAttribute("contentType"))){
          extList.add(0, extensions[i]);
        }
        else if (extensions[i].getAttribute("contentType") != null
            && contentType.matches(escapeContentType(extensions[i].getAttribute("contentType")))) {
          extList.add(extensions[i]);
        }
      }
     
      if (extList.size() > 0) {
        if (LOG.isInfoEnabled()) {
          StringBuffer extensionsIDs = new StringBuffer("[");
          boolean isFirst = true;
          for (Extension ext : extList){
            if (!isFirst) extensionsIDs.append(" - ");
            else isFirst=false;
            extensionsIDs.append(ext.getId());
          }
        extensionsIDs.append("]");
          LOG.info("The parsing plugins: " + extensionsIDs.toString() +
                   " are enabled via the plugin.includes system " +
                   "property, and all claim to support the content type " +
View Full Code Here

TOP

Related Classes of org.apache.nutch.plugin.Extension

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.