Package org.apache.nutch.webapp

Source Code of org.apache.nutch.webapp.CacheManager

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nutch.webapp;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.Serializable;
import java.util.Properties;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.webapp.common.Search;
import org.apache.nutch.webapp.common.ServiceLocator;

import com.opensymphony.oscache.base.Cache;
import com.opensymphony.oscache.base.CacheEntry;
import com.opensymphony.oscache.base.EntryRefreshPolicy;
import com.opensymphony.oscache.base.NeedsRefreshException;
import com.opensymphony.oscache.general.GeneralCacheAdministrator;

/**
* This class is responsible for configuring the used cache and
* delivering cached Search objects.
*
* Configuration parameters can be overrided with default nutch
* configuration mechanism.
*
* Search Objects are compressed for smaller space requirements.
*/
public class CacheManager {
 
  public static class ByteBufferWrapper implements Serializable {
   
    private static final long serialVersionUID = 1L;
    byte[] contents;
   
    public ByteBufferWrapper(final byte[] contents){
      this.contents=contents;
    }
   
    public byte[] getContents(){
      return contents;
    }
   
  }

  static final Log LOG=LogFactory.getLog(CacheManager.class);
 
  static final String CACHE_KEY=CacheManager.class.getName();

  class NutchRefreshPolicy implements EntryRefreshPolicy {

    private static final long serialVersionUID = 1L;

    public boolean needsRefresh(CacheEntry arg0) {
      return false;
    }
  }
 
  EntryRefreshPolicy policy=new NutchRefreshPolicy();


  Cache cache;
  GeneralCacheAdministrator cacheadmin;
 
  protected CacheManager(Configuration conf){
   
    Properties p=new Properties();
   
    //use memory for caching
    boolean cacheMemory=conf.getBoolean("cache.memory", false);
    p.setProperty("cache.memory", Boolean.toString(cacheMemory));
   
    //the persistence class used
    String cachePersistenceClass=conf.get("cache.persistence.class","org.apache.nutch.cache.CustomDiskPersistenceListener");
    p.setProperty("cache.persistence.class", cachePersistenceClass);

    //where to store cache files (if file cache used)
    String cachePath=conf.get("cache.path", ".");
    p.setProperty("cache.path", cachePath);

    //cacacity of cache (how many entries)
    int cacheCapacity=conf.getInt("cache.capacity", 1000);
    p.setProperty("cache.capacity", Integer.toString(cacheCapacity));
   
    cacheadmin=new GeneralCacheAdministrator(p);
    cache=cacheadmin.getCache();
  }
 
  public synchronized static CacheManager getInstance(Configuration conf){
    CacheManager cache=(CacheManager)conf.getObject(CACHE_KEY);
   
    if(cache==null) {
      cache = new CacheManager(conf);
     
      conf.setObject(CACHE_KEY, cache);
    }
    return cache;
  }
 
  /**
   * Get Search object from cache
   * @param id key
   * @return
   * @throws NeedsRefreshException
   */
  public Search getSearch(String id, ServiceLocator locator) throws NeedsRefreshException  {
    Search search=null;
   
    ByteBufferWrapper w=(ByteBufferWrapper)cache.getFromCache(id);
    if(w!=null){
     

      try {
        long time=System.currentTimeMillis();
        ByteArrayInputStream is=new ByteArrayInputStream(w.getContents());
        GZIPInputStream gs = new GZIPInputStream(is);
        DataInputStream dis = new DataInputStream(gs);

        search = new Search(locator);
        search.readFields(dis);
        long delta=System.currentTimeMillis()-time;
       
        if(LOG.isDebugEnabled()){
          LOG.debug("Decompressing cache entry took: " + delta + "ms.");
        }

        search.init();
      } catch (IOException e) {
        LOG.info("Could not get cached object: " + e);
      }
    }
    return search;
  }

  /**
   * Put Search object in cache
   *
   * @param id key
   * @param search the search to cache
   */
  public void putSearch(String id, Search search){
    try {
      long time=System.currentTimeMillis();
      ByteArrayOutputStream bos=new ByteArrayOutputStream();
      GZIPOutputStream gzos=new GZIPOutputStream(bos);
      DataOutputStream oos=new DataOutputStream(gzos);
      search.write(oos);
      oos.flush();
      oos.close();
      gzos.close();
      long delta=System.currentTimeMillis()-time;
      ByteBufferWrapper wrap=new ByteBufferWrapper(bos.toByteArray());
      if(LOG.isDebugEnabled()){
        LOG.debug("Compressing cache entry took: " + delta + "ms.");
        LOG.debug("size: " + wrap.getContents().length + " bytes");
      }
      cache.putInCache(id, wrap);
    } catch (IOException e) {
      LOG.info("cannot store object in cache: " + e);
    }
  }

  public void cancelUpdate(String key) {
    cache.cancelUpdate(key);
  }
}
TOP

Related Classes of org.apache.nutch.webapp.CacheManager

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.