Package org.archive.util

Source Code of org.archive.util.ObjectIdentityBdbCache$PhantomEntry

/*
*  This file is part of the Heritrix web crawler (crawler.archive.org).
*
*  Licensed to the Internet Archive (IA) by one or more individual
*  contributors.
*
*  The IA licenses this file to You under the Apache License, Version 2.0
*  (the "License"); you may not use this file except in compliance with
*  the License.  You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
*  Unless required by applicable law or agreed to in writing, software
*  distributed under the License is distributed on an "AS IS" BASIS,
*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*  See the License for the specific language governing permissions and
*  limitations under the License.
*/

package org.archive.util;

import java.io.Closeable;
import java.io.File;
import java.io.Serializable;
import java.lang.ref.PhantomReference;
import java.lang.ref.Reference;
import java.lang.ref.ReferenceQueue;
import java.lang.ref.SoftReference;
import java.lang.reflect.Field;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
import java.util.logging.Level;
import java.util.logging.Logger;

import org.archive.bdb.KryoBinding;

import com.sleepycat.bind.EntryBinding;
import com.sleepycat.bind.serial.StoredClassCatalog;
import com.sleepycat.bind.tuple.TupleBinding;
import com.sleepycat.collections.StoredSortedMap;
import com.sleepycat.je.Database;
import com.sleepycat.je.DatabaseConfig;
import com.sleepycat.je.DatabaseException;
import com.sleepycat.je.Environment;

/**
* A BDB JE backed object cache.
*
* Soft references to previously-instantiated objects are held so that
* unless/until an object is garbage collected, subsequent get()s will
* return the exact same object. (If all outside references are lost,
* when the soft reference is broken, the object state -- still
* accessible to this class via reflective access to a phantom
* referent --is flushed to disk. The next get() will reconsitute a new
* object, from the disk state.)
* <p/>
* The backing disk is only guaranteed to be up-to-date after a flush
* of all in-memory values to disk, as can be forced by sync().
* <p/>
* To ensure that changes/mutations to values in this map are coherent and
* consistent at the application level, it is assumed that the application
* level only mutates values that are in this map and does not retain references
* to values longer than necessary.  This allows mappings to be persisted
* during GC without explicit transactions or write operations.
* <p/>
* Based on the earlier CachedBdbMap.
* <p/>
*
* @author John Erik Halse
* @author stack
* @author gojomo
* @author paul baclace (conversion to ConcurrentMap)
*/
public class ObjectIdentityBdbCache<V extends IdentityCacheable>
implements ObjectIdentityCache<V>, Closeable, Serializable {
    private static final long serialVersionUID = 1L;
    private static final Logger logger =
        Logger.getLogger(ObjectIdentityBdbCache.class.getName());

    /** The BDB JE database used for this instance. */
    protected transient Database db;

    /** in-memory map of new/recent/still-referenced-elsewhere instances */
    protected transient ConcurrentHashMap<String,SoftEntry<V>> memMap;
    protected transient ReferenceQueue<V> refQueue;

    /** The Collection view of the BDB JE database used for this instance. */
    protected transient StoredSortedMap<String, V> diskMap;

    protected AtomicLong count;
   
    //
    // USAGE STATS
    //
    /** Count of times we got an object from in-memory cache */
    private AtomicLong cacheHit = new AtomicLong(0);
    /** Count of times the {@link ObjectIdentityBdbCache#get} method was called. */
    private AtomicLong countOfGets = new AtomicLong(0);
    /** Count of every time disk-based map provided non-null object */
    private AtomicLong diskHit = new AtomicLong(0);
    /** Count of times Supplier was used for new object */
    private AtomicLong supplierUsed = new AtomicLong(0);
    /** count of expunge put() to BDB (implies disk) */
    private AtomicLong expungeStatsDiskPut = new AtomicLong(0);
    /** count of {@link #sync()} use */
    transient private AtomicLong useStatsSyncUsed = new AtomicLong(0);
   
    /** Reference to the Reference#referent Field. */
    protected static Field referentField;
    static {
        // We need access to the referent field in the PhantomReference.
        // For more on this trick, see
        //
        // http://www.javaspecialists.co.za/archive/Issue098.html and for
        // discussion:
        // http://www.theserverside.com/tss?service=direct/0/NewsThread/threadViewer.markNoisy.link&sp=l29865&sp=l146901
        try {
            referentField = Reference.class.getDeclaredField("referent");
            referentField.setAccessible(true);
        } catch (SecurityException e) {
            throw new RuntimeException(e);
        } catch (NoSuchFieldException e) {
            throw new RuntimeException(e);
        }
    }

    /**
     * Constructor. You must call
     * {@link #initialize(Environment, Class, Class, StoredClassCatalog)}
     * to finish construction. Construction is two-stepped to support
     * reconnecting a deserialized CachedBdbMap with its backing bdbje
     * database.
     *
     * @param dbName Name of the backing db this instance should use.
     */
    public ObjectIdentityBdbCache() {
        super();
    }
   
    /**
     * Call this method when you have an instance when you used the
     * default constructor or when you have a deserialized instance that you
     * want to reconnect with an extant bdbje environment.  Do not
     * call this method if you used the
     * {@link #CachedBdbMap(File, String, Class, Class)} constructor.
     * @param env
     * @param keyClass
     * @param valueClass
     * @param classCatalog
     * @throws DatabaseException
     */
    public void initialize(final Environment env, String dbName,
            final Class valueClass, final StoredClassCatalog classCatalog)
    throws DatabaseException {
        // TODO: initial capacity should be related to number of seeds, max depth, max docs
        this.memMap = new ConcurrentHashMap<String,SoftEntry<V>>(
                                                            8192, // initial capacity
                                                            0.9f, // acceptable load factor
                                                            64 // est. number of concurrent threads
                                                            );
        this.refQueue = new ReferenceQueue<V>();
        canary = new SoftReference<LowMemoryCanary>(new LowMemoryCanary());
       
        this.db = openDatabase(env, dbName);
        this.diskMap = createDiskMap(this.db, classCatalog, valueClass);
        this.count = new AtomicLong(diskMap.size());
    }

    @SuppressWarnings("unchecked")
    protected StoredSortedMap<String, V> createDiskMap(Database database,
            StoredClassCatalog classCatalog, Class valueClass) {
        EntryBinding keyBinding = TupleBinding.getPrimitiveBinding(String.class);
        EntryBinding valueBinding = TupleBinding.getPrimitiveBinding(valueClass);
        if(valueBinding == null) {
            valueBinding =
                new KryoBinding<V>(valueClass);
//                new SerialBinding(classCatalog, valueClass);
//                new BenchmarkingBinding<V>(new EntryBinding[] {
//                      new KryoBinding<V>(valueClass),                  
//                      new RecyclingSerialBinding<V>(classCatalog, valueClass),
//                  }, valueClass);
        }
        return new StoredSortedMap<String,V>(database, keyBinding, valueBinding, true);
    }

    protected Database openDatabase(final Environment environment,
            final String dbName) throws DatabaseException {
        DatabaseConfig dbConfig = new DatabaseConfig();
        dbConfig.setTransactional(false);
        dbConfig.setAllowCreate(true);
        dbConfig.setDeferredWrite(true);
        return environment.openDatabase(null, dbName, dbConfig);
    }

    /* (non-Javadoc)
     * @see org.archive.util.ObjectIdentityCache#close()
     */
    public synchronized void close() {
        // Close out my bdb db.
        if (this.db != null) {
            try {
                sync();
                this.db.sync();
                this.db.close();
            } catch (DatabaseException e) {
                logger.log(Level.WARNING,"problem closing ObjectIdentityBdbCache",e);
            } finally {
                this.db = null;
            }
        }
    }

    protected void finalize() throws Throwable {
        close();
        super.finalize();
    }

    /* (non-Javadoc)
     * @see org.archive.util.ObjectIdentityCache#get(java.lang.String)
     */
    public V get(final String key) {
        return getOrUse(key,null);
    }
   
    /* (non-Javadoc)
     * @see org.archive.util.ObjectIdentityCache#get(java.lang.String, org.archive.util.ObjectIdentityBdbCache)
     */
    public V getOrUse(final String key, Supplier<V> supplierOrNull) {
        countOfGets.incrementAndGet();
       
        if (countOfGets.get() % 10000 == 0) {
            logCacheSummary();
        }
       
        // check mem cache
        SoftEntry<V> entry = memMap.get(key);
        if(entry != null) {
            V val = entry.get();
            if(val != null) {
                // the concurrent garden path: in mem, valid
                cacheHit.incrementAndGet();
                val.setIdentityCache(this);
                return val;
            }
        }
       
        // everything in other difficult cases happens inside this block
        synchronized(this) {
            // recheck mem cache -- if another thread beat us into sync
            // block and already filled the key
            entry = memMap.get(key);
            if(entry != null) {
                V val = entry.get();
                if(val != null) {
                    cacheHit.incrementAndGet();
                    val.setIdentityCache(this);
                    return val;
                }
            }
            // persist to disk all ref-enqueued stale (soft-ref-cleared) entries now
            pageOutStaleEntries();
            // and catch if this exact entry not yet ref-enqueued
            if(memMap.get(key)!=null) {
                pageOutStaleEntry(entry);
                if(memMap.get(key)!=null) {
                    logger.log(Level.SEVERE,"nulled key "+key+" not paged-out", new Exception());
                }
            }
           
            // check disk
            V valDisk = (V) diskMap.get(key);
            if(valDisk==null) {
                // never yet created, consider creating
                if(supplierOrNull==null) {
                    return null;
                }
                // create using provided Supplier
                valDisk = supplierOrNull.get();
                supplierUsed.incrementAndGet();
                // putting initial value directly into diskMap
                // (rather than just the memMap until page-out)
                // ensures diskMap.keySet() provides complete view
                V prevVal = diskMap.putIfAbsent(key, valDisk);
                count.incrementAndGet();
                if(prevVal!=null) {
                    // ERROR: diskMap modification since previous
                    // diskMap.get() should be impossible
                    logger.log(Level.SEVERE,"diskMap modified outside synchronized block?");
                }
            } else {
                diskHit.incrementAndGet();
            }

            // keep new val in memMap
            SoftEntry<V> newEntry = new SoftEntry<V>(key, valDisk, refQueue);
            SoftEntry<V> prevVal = memMap.putIfAbsent(key, newEntry);
            if(prevVal != null) {
                // ERROR: memMap modification since previous
                // memMap.get() should be impossible
                logger.log(Level.SEVERE,"memMap modified outside synchronized block?", new Exception());
            }
            valDisk.setIdentityCache(this);
            return valDisk;
        }
    }

    /* (non-Javadoc)
     * @see org.archive.util.ObjectIdentityCache#keySet()
     */
    public Set<String> keySet() {
        return diskMap.keySet();
    }
   
    /**
     * Summary to log, if at FINE level
     */
    private void logCacheSummary() {
        if (logger.isLoggable((Level.FINE))) {
            logger.fine(composeCacheSummary());
        }
    }
   
    protected String composeCacheSummary() {
        long totalHits = cacheHit.get() + diskHit.get();
        if (totalHits < 1) {
            return "";
        }
        long cacheHitPercent
                = (cacheHit.get() * 100) / totalHits;
        StringBuilder sb = new StringBuilder(120);
        sb.append("DB name:")
          .append(getDatabaseName())
          .append(", ")
          .append(" hit%: ")
          .append(cacheHitPercent)
          .append("%, gets=")
          .append(countOfGets.get())
          .append(" memHits=")
          .append(cacheHit.get())
          .append(" diskHits=")
          .append(diskHit.get())
          .append(" supplieds=")
          .append(supplierUsed.get())
          .append(" expungePuts=")
          .append(expungeStatsDiskPut.get())
          .append(" syncs=")
          .append(useStatsSyncUsed.get());
        return sb.toString();
    }

    /* (non-Javadoc)
     * @see org.archive.util.ObjectIdentityCache#size()
     */
    public int size() {
        if(db==null) {
            return 0;
        }
        return (int) count.get();
    }
   
    protected String getDatabaseName() {
        String name = "DbName-Lookup-Failed";
        try {
            if (this.db != null) {
                name = this.db.getDatabaseName();
            }
        } catch (DatabaseException e) {
            // Ignore.
        }
        return name;
    }
   
    /**
     * Sync all in-memory map entries to backing disk store.
     */
    public synchronized void sync() {
        String dbName = null;
        // Sync. memory and disk.
        useStatsSyncUsed.incrementAndGet();
        long startTime = 0;
        if (logger.isLoggable(Level.FINE)) {
            dbName = getDatabaseName();
            startTime = System.currentTimeMillis();
            logger.fine(dbName + " start sizes: disk " + this.diskMap.size() +
                ", mem " + this.memMap.size());
        }
       
        for (String key : this.memMap.keySet()) {
            SoftEntry<V> entry = memMap.get(key);
            if (entry != null) {
                // Get & hold so not cleared pre-return.
                V value = entry.get();
                if (value != null) {
                    expungeStatsDiskPut.incrementAndGet();
                    this.diskMap.put(key, value); // unchecked cast
                }
            }
        }
        pageOutStaleEntries();
       
        // force sync of deferred-writes
        try {
            this.db.sync();
        } catch (DatabaseException e) {
            throw new RuntimeException(e);
        }
       
        if (logger.isLoggable(Level.FINE)) {
            logger.fine(dbName + " sync took " +
                (System.currentTimeMillis() - startTime) + "ms. " +
                "Finish sizes: disk " +
                this.diskMap.size() + ", mem " + this.memMap.size());
        }
    }
    @Override
    public void dirtyKey(String key) {
        // do nothing, because our weak/phantom trickery is supposed to
        // ensure sync-to-persistence if/when dereferenced and collected
    }

    /** An incremental, poll-based expunger.
     *
     * Package-protected for unit-test visibility.
     */
    @SuppressWarnings("unchecked")
    protected synchronized void pageOutStaleEntries() {
        int c = 0;
        long startTime = System.currentTimeMillis();
        for(SoftEntry<V> entry; (entry = (SoftEntry<V>)refQueue.poll()) != null;) {
            pageOutStaleEntry(entry);
            c++;
        }
        if (c > 0 && logger.isLoggable(Level.FINER)) {
            long endTime = System.currentTimeMillis();
            try {
                logger.finer("DB: " + db.getDatabaseName() + ",  Expunged: "
                        + c + ", Diskmap size: " + diskMap.size()
                        + ", Cache size: " + memMap.size()
                        + ", in "+(endTime-startTime)+"ms");
            } catch (DatabaseException e) {
                logger.log(Level.FINER,"exception while logging",e);
            }
        }
    }
   
    /**
     * Expunge an entry from memMap while updating diskMap.
     *
     * @param entry a SoftEntry<V> obtained from refQueuePoll()
     */
   synchronized private void pageOutStaleEntry(SoftEntry<V> entry) {
        PhantomEntry<V> phantom = entry.phantom;
       
        // Still in memMap? if not, was paged-out by earlier direct access
        // before placed into reference-queue; just return
        if (memMap.get(phantom.key) != entry) { // NOTE: intentional identity compare
            return;
        }
       
        // recover hidden value
        V phantomValue = phantom.doctoredGet();

        // Expected value present? (should be; only clear is at end of
        // this method, after entry removal from memMap)
        if(phantomValue == null) {
            logger.log(Level.WARNING,"unexpected null phantomValue", new Exception());
            return; // nothing to do
        }
       
        // given instance entry still in memMap;
        // we have the key and phantom Value,
        // the diskMap can be updated.
        diskMap.put(phantom.key, phantomValue); // unchecked cast
        expungeStatsDiskPut.incrementAndGet();
       
        //  remove memMap entry
        boolean removed = memMap.remove(phantom.key, entry);
        if(!removed) {
            logger.log(Level.WARNING,"expunge memMap.remove() ineffective",new Exception());
        }
        phantom.clear(); // truly allows GC of unreferenced V object
    }
   
    private static class PhantomEntry<V> extends PhantomReference<V> {
        protected final String key;

        public PhantomEntry(String key, V referent) {
            super(referent, null);
            this.key = key;
        }

        /**
         * @return Return the referent. The contract for {@link #get()}
         * always returns a null referent.  We've cheated and doctored
         * PhantomReference to return the actual referent value.  See notes
         * at {@link #referentField};
         */
        @SuppressWarnings("unchecked")
        final public V doctoredGet() {
            try {
                // Here we use the referentField saved off on static
                // initialization of this class to get at this References'
                // private referent field.
                return (V) referentField.get(this);
            } catch (IllegalAccessException e) {
                throw new RuntimeException(e);
            }
        }
    }

    /**
     * SoftReference cache entry.
     *
     * A PhantomReference is used to hold the key and value as a last
     * chance before GC hook that can effect the update of diskMap.
     * <p/>
     * Entries are not recycled.
     */
    private static class SoftEntry<V> extends SoftReference<V> {
        PhantomEntry<V> phantom;

        public SoftEntry(String key, V referent, ReferenceQueue<V> q) {
            super(referent, q);
            this.phantom = new PhantomEntry<V>(key, referent);
        }

        public V get() {
            // ensure visibility
            synchronized (this) {
                return super.get();
            }
        }

        public String toString() {
            if (phantom != null) {
                return "SoftEntry(key=" + phantom.key + ")";
            } else {
                return "SoftEntry()";
            }
        }
    }

    //
    // Crude, probably unreliable/fragile but harmless mechanism to
    // trigger expunge of cleared SoftReferences in low-memory
    // conditions even without any of the other get/put triggers.
    //
   
    protected transient SoftReference<LowMemoryCanary> canary;
    protected class LowMemoryCanary {
        /** When collected/finalized -- as should be expected in
         *  low-memory conditions -- trigger an expunge and a
         *  new 'canary' insertion. */
        public void finalize() {
            ObjectIdentityBdbCache.this.pageOutStaleEntries();
//            System.err.println("CANARY KILLED - "+ObjectIdentityBdbCache.this);
            // only install new canary if map still 'open' with db reference
            if(ObjectIdentityBdbCache.this.db !=null) {
                ObjectIdentityBdbCache.this.canary =
                    new SoftReference<LowMemoryCanary>(new LowMemoryCanary());
            } else {
                ObjectIdentityBdbCache.this.canary = null;
            }
        }
    }
}
TOP

Related Classes of org.archive.util.ObjectIdentityBdbCache$PhantomEntry

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.