Source Code of edu.brown.hstore.estimators.markov.MarkovEstimator

package edu.brown.hstore.estimators.markov;


import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.ConcurrentHashMap;


import org.apache.commons.collections15.keyvalue.MultiKey;
import org.apache.log4j.Logger;
import org.voltdb.CatalogContext;
import org.voltdb.catalog.Procedure;
import org.voltdb.catalog.Statement;
import org.voltdb.exceptions.ServerFaultException;
import org.voltdb.utils.EstTime;
import org.voltdb.utils.Pair;


import edu.brown.catalog.CatalogKey;
import edu.brown.graphs.GraphvizExport;
import edu.brown.hstore.Hstoreservice.Status;
import edu.brown.hstore.estimators.EstimatorState;
import edu.brown.hstore.estimators.TransactionEstimator;
import edu.brown.hstore.txns.TransactionUtil;
import edu.brown.interfaces.DebugContext;
import edu.brown.logging.LoggerUtil;
import edu.brown.logging.LoggerUtil.LoggerBoolean;
import edu.brown.markov.MarkovEdge;
import edu.brown.markov.MarkovGraph;
import edu.brown.markov.MarkovGraphTimes;
import edu.brown.markov.MarkovUtil;
import edu.brown.markov.MarkovVertex;
import edu.brown.markov.containers.MarkovGraphsContainer;
import edu.brown.pools.TypedObjectPool;
import edu.brown.pools.TypedPoolableObjectFactory;
import edu.brown.profilers.MarkovEstimatorProfiler;
import edu.brown.profilers.ProfileMeasurement;
import edu.brown.utils.CollectionUtil;
import edu.brown.utils.ParameterMangler;
import edu.brown.utils.PartitionEstimator;
import edu.brown.utils.PartitionSet;
import edu.brown.utils.StringUtil;
import edu.brown.workload.QueryTrace;
import edu.brown.workload.TransactionTrace;


/**
 * Markov Model-based Transaction Estimator
 * @author pavlo
 */
public class MarkovEstimator extends TransactionEstimator {
    private static final Logger LOG = Logger.getLogger(MarkovEstimator.class);
    private static final LoggerBoolean debug = new LoggerBoolean();
    private static final LoggerBoolean trace = new LoggerBoolean();
    static {
        LoggerUtil.attachObserver(LOG, debug, trace);
    }
    
    // ----------------------------------------------------------------------------
    // STATIC DATA MEMBERS
    // ----------------------------------------------------------------------------
    
    /**
     * The amount of change in visitation of vertices we would tolerate before we need
     * to recompute the graph.
     * TODO (pavlo): Saurya says: Should this be in MarkovGraph?
     */
    private static final double RECOMPUTE_TOLERANCE = (double) 0.5;


    // ----------------------------------------------------------------------------
    // DATA MEMBERS
    // ----------------------------------------------------------------------------
    
    private final CatalogContext catalogContext;
    private final MarkovGraphsContainer markovs;
    private final MarkovGraphTimes markovTimes = new MarkovGraphTimes();


    private final TypedObjectPool<MarkovPathEstimator> pathEstimatorsPool;
    private final TypedObjectPool<MarkovEstimatorState> statesPool;
    
    /**
     * We can maintain a cache of the last successful MarkovPathEstimator per MarkovGraph
     */
    private final Map<MarkovGraph, List<MarkovVertex>> cached_paths = new HashMap<MarkovGraph, List<MarkovVertex>>();
    
    /**
     * For a given vertex, maintain a map to possible future vertices
     */
    private final Map<MarkovVertex, ConcurrentHashMap<MultiKey<String>, Pair<MarkovEdge, MarkovVertex>>> cache_batchEnd;
    
    private transient boolean enable_recomputes = false;
    
    /**
     * If we're using the TransactionEstimator, then we need to convert all 
     * primitive array ProcParameters into object arrays...
     * ProcedureId -> ParameterMangler
     */
    private final ParameterMangler manglers[];
    
    private final MarkovEstimatorProfiler profiler;
    
    // ----------------------------------------------------------------------------
    // CONSTRUCTORS
    // ----------------------------------------------------------------------------


    /**
     * Constructor
     * @param p_estimator
     * @param mappings
     * @param markovs
     */
    public MarkovEstimator(CatalogContext catalogContext, PartitionEstimator p_estimator, MarkovGraphsContainer markovs) {
        super(p_estimator);
        this.catalogContext = catalogContext;
        this.markovs = markovs;
        this.cache_batchEnd = new HashMap<MarkovVertex, ConcurrentHashMap<MultiKey<String>,Pair<MarkovEdge,MarkovVertex>>>();
        
        if (this.markovs != null && this.markovs.getHasher() == null) 
            this.markovs.setHasher(this.hasher);
        
        // Create all of our parameter manglers
        this.manglers = new ParameterMangler[this.catalogContext.procedures.size() + 1];
        for (Procedure catalog_proc : this.catalogContext.procedures) {
            if (catalog_proc.getSystemproc()) continue;
            this.manglers[catalog_proc.getId()] = ParameterMangler.singleton(catalog_proc);
        } // FOR
        
        if (debug.val)
            LOG.debug("Creating MarkovPathEstimator Object Pool");
        TypedPoolableObjectFactory<MarkovPathEstimator> m_factory = new MarkovPathEstimator.Factory(this.catalogContext, this.p_estimator);
        this.pathEstimatorsPool = new TypedObjectPool<MarkovPathEstimator>(m_factory, hstore_conf.site.pool_pathestimators_idle);
        
        if (debug.val)
            LOG.debug("Creating MarkovEstimatorState Object Pool");
        TypedPoolableObjectFactory<MarkovEstimatorState> s_factory = new MarkovEstimatorState.Factory(this.catalogContext);
        int num_idle = (int)(hstore_conf.site.network_incoming_limit_txns * hstore_conf.site.pool_scale_factor); 
        this.statesPool = new TypedObjectPool<MarkovEstimatorState>(s_factory, num_idle);
        
        if (hstore_conf.site.markov_profiling) {
            this.profiler = new MarkovEstimatorProfiler();
        } else {
            this.profiler = null;
        }
    }


    // ----------------------------------------------------------------------------
    // DATA MEMBER METHODS
    // ----------------------------------------------------------------------------


    public void enableGraphRecomputes() {
       this.enable_recomputes = true;
    }
    public MarkovGraphsContainer getMarkovGraphsContainer() {
        return (this.markovs);
    }
    public MarkovGraphTimes getMarkovGraphTimes() {
        return (this.markovTimes);
    }
    
    // ----------------------------------------------------------------------------
    // RUNTIME METHODS
    // ----------------------------------------------------------------------------


    @SuppressWarnings("unchecked")
    @Override
    public MarkovEstimatorState startTransactionImpl(Long txn_id, int base_partition, Procedure catalog_proc, Object[] args) {
        long timestamp = -1l;
        if (this.profiler != null) timestamp = ProfileMeasurement.getTime();
        
        ParameterMangler mangler = this.manglers[catalog_proc.getId()];
        if (mangler != null) args = mangler.convert(args);
        
        assert (catalog_proc != null);
        long start_time = EstTime.currentTimeMillis();
        if (debug.val)
            LOG.debug(String.format("%s - Starting transaction estimation [partition=%d]",
                      TransactionUtil.formatTxnName(catalog_proc, txn_id), base_partition));


        // If we don't have a graph for this procedure, we should probably just return null
        // This will be the case for all sysprocs
        if (this.markovs == null) return (null);
        MarkovGraph markov = this.markovs.getFromParams(txn_id, base_partition, args, catalog_proc);
        if (markov == null) {
            if (debug.val)
                LOG.debug(String.format("%s - No MarkovGraph is available for transaction",
                          TransactionUtil.formatTxnName(catalog_proc, txn_id)));
            if (this.profiler != null) this.profiler.start_time.appendTime(timestamp);
            return (null);
        }
        
        if (trace.val)
            LOG.trace(String.format("%s - Creating new MarkovEstimatorState",
                      TransactionUtil.formatTxnName(catalog_proc, txn_id)));
        MarkovEstimatorState state = null;
        try {
            state = (MarkovEstimatorState)statesPool.borrowObject();
            assert(state.isInitialized() == false);
            state.init(txn_id, base_partition, markov, args, start_time);
        } catch (Throwable ex) {
            throw new RuntimeException(ex);
        }
        assert(state.isInitialized()) : 
            "Unexpectted uninitialized MarkovEstimatorState\n" + state;
        
        MarkovVertex start = markov.getStartVertex();
        assert(start != null) : "The start vertex is null. This should never happen!";
        MarkovEstimate initialEst = state.createNextEstimate(start, true);
        this.estimatePath(state, initialEst, catalog_proc, args);
        
        if (debug.val) {
            String txnName = TransactionUtil.formatTxnName(catalog_proc, txn_id);
            LOG.debug(String.format("%s - Initial MarkovEstimate\n%s", txnName, initialEst));
            List<MarkovVertex> path = initialEst.getMarkovPath();
            if (path.isEmpty()) {
                LOG.debug(String.format("%s - Initial empty path for txn is empty because the graph is new",
                          txnName));
            } else {
                LOG.trace(String.format("%s - Estimated Path [length=%d]\n%s",
                          txnName, path.size(),
                          StringUtil.join("\n----------------------\n", path)));
            }
        }
        
        // Update EstimatorState.prefetch any time we transition to a MarkovVertex where the
        // underlying Statement catalog object was marked as prefetchable
        // Do we want to put this traversal above?
        if (hstore_conf.site.exec_prefetch_queries) {
            for (MarkovVertex vertex : initialEst.getMarkovPath()) {
                Statement statement = (Statement) vertex.getCatalogItem();
                if (statement.getPrefetchable()) {
                    if (debug.val)
                        LOG.debug(String.format("%s - Checking whether we can prefetch %s on partitions %s",
                                 TransactionUtil.formatTxnName(catalog_proc, txn_id),
                                 statement.fullName(), vertex.getPartitions()));
                    if (vertex.getPartitions().isEmpty() == false && vertex.getPartitions().get() != base_partition) {
                        state.addPrefetchableStatement(vertex.getCountedStatement());
                    }
                }
            } // FOR
        }
        
        // We want to add the estimate to the state down here after we have initialized
        // everything. This prevents other threads from accessing it before we have
        // initialized it properly.
        state.addInitialEstimate(initialEst);
        
        if (this.profiler != null) this.profiler.start_time.appendTime(timestamp);
        return (state);
    }
    
    @SuppressWarnings("unchecked")
    @Override
    public MarkovEstimate executeQueries(EstimatorState s, Statement catalog_stmts[], PartitionSet partitions[]) {
        long timestamp = -1l;
        if (this.profiler != null) timestamp = ProfileMeasurement.getTime();
        
        MarkovEstimatorState state = (MarkovEstimatorState)s; 
        if (debug.val)
            LOG.debug(String.format("Processing %d queries for txn #%d",
                      catalog_stmts.length, state.getTransactionId()));
        int batch_size = catalog_stmts.length;
        
        // If we get here, then we should definitely have a MarkovGraph
        MarkovGraph markov = state.getMarkovGraph();
        assert(markov != null);
            
        MarkovVertex current = state.getCurrent();
        PartitionSet touchedPartitions = state.getTouchedPartitions();
        MarkovVertex next_v = null;
        MarkovEdge next_e = null;
        Statement last_stmt = null;
        int stmt_idxs[] = null;


        // We can cache what the path is based on the first and last query in the batch
        // We only want to do this for batches that are large enough.
        if (hstore_conf.site.markov_endpoint_caching &&
                batch_size >= hstore_conf.site.markov_batch_caching_min) {
            assert(current != null);
            if (debug.val)
                LOG.debug("Attempting cache look-up for last statement in batch: " + Arrays.toString(catalog_stmts));
            
            state.cache_last_partitions.clear();
            state.cache_past_partitions.clear();
            
            PartitionSet last_partitions;
            stmt_idxs = new int[batch_size];
            for (int i = 0; i < batch_size; i++) {
                last_stmt = catalog_stmts[i];
                last_partitions = partitions[batch_size - 1];
                stmt_idxs[i] = state.updateQueryInstanceCount(last_stmt);
                if (i+1 != batch_size) {
                    state.cache_past_partitions.addAll(last_partitions);
                }
                else {
                    state.cache_last_partitions.addAll(last_partitions);
                }
            } // FOR
            
            Pair<MarkovEdge, MarkovVertex> pair = this.getCachedBatchEnd(current,
                                                                         last_stmt,
                                                                         stmt_idxs[batch_size-1],
                                                                         state.cache_last_partitions,
                                                                         state.cache_past_partitions);
            if (pair != null) {
                next_e = pair.getFirst();
                assert(next_e != null);
                next_v = pair.getSecond();
                assert(next_v != null);
                if (debug.val)
                    LOG.debug(String.format("Got cached batch end for %s: %s -> %s",
                              markov, current, next_v));
                
                // Update the counters and other info for the next vertex and edge
                if (this.enable_recomputes) {
                    this.markovTimes.addInstanceTime(next_v,
                                                     state.getTransactionId(),
                                                     state.getExecutionTimeOffset());
                }
                
                // Update the state information
                state.setCurrent(next_v, next_e);
                touchedPartitions.addAll(state.cache_last_partitions);
                touchedPartitions.addAll(state.cache_past_partitions);
            }
        }
        
        // Roll through the Statements in this batch and move the current vertex
        // for the txn's State handle along the path in the MarkovGraph
        if (next_v == null) {
            for (int i = 0; i < batch_size; i++) {
                int queryCount = (stmt_idxs != null ? stmt_idxs[i] : -1);
                this.consume(state, markov, catalog_stmts[i], partitions[i], queryCount);
                if (stmt_idxs == null) touchedPartitions.addAll(partitions[i]);
            } // FOR
            
            // Update our cache if we tried and failed before
            if (hstore_conf.site.markov_endpoint_caching && stmt_idxs != null) {
                if (debug.val)
                    LOG.debug(String.format("Updating cache batch end for %s: %s -> %s",
                              markov, current, state.getCurrent()));
                this.addCachedBatchEnd(current,
                                       CollectionUtil.last(state.actual_path_edges),
                                       state.getCurrent(),
                                       last_stmt,
                                       stmt_idxs[batch_size-1],
                                       state.cache_past_partitions,
                                       state.cache_last_partitions);
            }
        }


        // 2012-10-17: This is kind of funky because we have to populate the
        // probabilities for the MarkovEstimate here, whereas for the initial estimate 
        // we did it inside of the MarkovPathEstimator
        MarkovEstimate estimate = state.createNextEstimate(state.getCurrent(), false);
        assert(estimate != null);
        Procedure catalog_proc = markov.getProcedure();
        Object procArgs[] = state.getProcedureParameters();
        this.estimatePath(state, estimate, catalog_proc, procArgs);
        
        if (debug.val)
            LOG.debug(String.format("Next MarkovEstimate for txn #%d\n%s",
                      state.getTransactionId(), estimate.toString()));
        assert(estimate.isInitialized()) :
            String.format("Unexpected uninitialized MarkovEstimate for txn #%d\n%s", state.getTransactionId(), estimate);
        assert(estimate.isValid()) :
            String.format("Invalid MarkovEstimate for txn #%d\n%s", state.getTransactionId(), estimate);
        
        // Once the workload shifts we detect it and trigger this method. Recomputes
        // the graph with the data we collected with the current workload method.
        if (this.enable_recomputes && markov.shouldRecompute(this.txn_count.get(), RECOMPUTE_TOLERANCE)) {
            markov.calculateProbabilities(catalogContext.getAllPartitionIds());
        }
        
        // We want to add the estimate to the state down here after we have initialized
        // everything. This prevents other threads from accessing it before we have
        // initialized it properly.
        state.addEstimate(estimate);
        if (this.profiler != null) this.profiler.update_time.appendTime(timestamp);
        return (estimate);
    }


    @Override
    protected void completeTransaction(EstimatorState s, Status status) {
        long timestamp = -1l;
        if (this.profiler != null) timestamp = ProfileMeasurement.getTime();
        MarkovEstimatorState state = (MarkovEstimatorState)s;


        // The transaction for the given txn_id is in limbo, so we just want to remove it
        if (status == Status.ABORT_MISPREDICT) {
            state.getMarkovGraph().incrementMispredictionCount();
            if (this.profiler != null) this.profiler.finish_time.appendTime(timestamp);
            return;
        }


        Long txn_id = state.getTransactionId();
        long end_time = EstTime.currentTimeMillis();
        MarkovGraph markov = state.getMarkovGraph();
        if (debug.val)
            LOG.debug(String.format("Cleaning up state info for txn #%d [status=%s]",
                      txn_id, status));


        // If there were no updates while the transaction was running, then
        // we don't want to try to update the model, because we will end up
        // connecting the START vertex to the COMMIT vertex, which is not correct
        if (state.isUpdatesEnabled()) {
            // We need to update the counter information in our MarkovGraph so that we know
            // that the procedure may transition to the ABORT vertex from where ever it was before 
            MarkovVertex current = state.getCurrent();
            assert(current != null) : 
                String.format("Missing current vertex for %s\n%s",
                              TransactionUtil.formatTxnName(markov.getProcedure(), txn_id), state);
            
            // If we don't have the terminal vertex, then we know that we don't care about
            // what this transaction actually did
            MarkovVertex next_v = markov.getFinishVertex(status);
            if (next_v == null) {
                if (this.profiler != null) this.profiler.finish_time.appendTime(timestamp);
                return;
            }
            
            // If no edge exists to the next vertex, then we need to create one
            MarkovEdge next_e = null;
            synchronized (next_v) {
                next_e = markov.addToEdge(current, next_v);
            } // SYNCH
            state.setCurrent(next_v, next_e); // For post-txn processing...
    
            // Update counters
            // We want to update the counters for the entire path right here so that
            // nobody gets incomplete numbers if they recompute probabilities
            for (MarkovVertex v : state.actual_path) v.incrementInstanceHits();
            for (MarkovEdge e : state.actual_path_edges) e.incrementInstanceHits();
            if (this.enable_recomputes) {
                this.markovTimes.addInstanceTime(next_v, txn_id, state.getExecutionTimeOffset(end_time));
            }
        }
        
        // Cache the path for the MarkovGraph if the path was correct for the txn
        if (hstore_conf.site.markov_path_caching && 
            this.cached_paths.containsKey(markov) == false && state.getInitialEstimate().isValid()) {
            MarkovEstimate initialEst = s.getInitialEstimate();
            synchronized (this.cached_paths) {
                if (this.cached_paths.containsKey(markov) == false) {
                    if (debug.val)
                        LOG.debug(String.format("Storing cached path through %s[#%d] that was used by txn #%d",
                                  markov, markov.getGraphId(), txn_id));
                    this.cached_paths.put(markov, initialEst.getMarkovPath());
                }
            } // SYNCH
        } else if (trace.val && hstore_conf.site.markov_path_caching) {
            LOG.trace(String.format("Not caching path through %s[#%d] used by txn #%d [alreadyCached=%s / isValid=%s]",
                      markov, markov.getGraphId(), txn_id,
                      this.cached_paths.containsKey(markov), state.getInitialEstimate().isValid()));
        }
        if (this.profiler != null) this.profiler.finish_time.appendTime(timestamp);
        return;
    }
    
    @Override
    public void destroyEstimatorState(EstimatorState s) {
        this.statesPool.returnObject((MarkovEstimatorState)s);
    }


    // ----------------------------------------------------------------------------
    // INTERNAL ESTIMATION METHODS
    // ----------------------------------------------------------------------------
    
    /**
     * Estimate the execution path of the txn based on its current vertex in the graph
     * The estimate will be stored in the given MarkovEstimate.
     * Note that the path could be empty.
     * @param state The txn's TransactionEstimator state
     * @param est The current TransactionEstimate for the txn
     * @param catalog_proc The Procedure being executed by the txn 
     * @param args Procedure arguments (mangled)
     */
    private void estimatePath(MarkovEstimatorState state, MarkovEstimate est, Procedure catalog_proc, Object args[]) {
        long timestamp = -1l;
        assert(state.isInitialized()) : state.hashCode();
        assert(est.isInitialized()) : state.hashCode();
        if (debug.val)
            LOG.debug(String.format("%s - Estimating execution path (%s)",
                      TransactionUtil.formatTxnName(catalog_proc, state.getTransactionId()),
                      (est.isInitialEstimate() ? "INITIAL" : "BATCH #" + est.getBatchId())));
        
        MarkovVertex currentVertex = est.getVertex();
        assert(currentVertex != null);
        if (this.enable_recomputes) {
            this.markovTimes.addInstanceTime(currentVertex, state.getTransactionId(), EstTime.currentTimeMillis());
        }
      
        // TODO: If the current vertex is in the initial estimate's list,
        // then we can just use the truncated list as the estimate, since we know
        // that the path will be the same. We don't need to recalculate everything
        MarkovGraph markov = state.getMarkovGraph();
        assert(markov != null) :
            String.format("Unexpected null MarkovGraph for %s [hashCode=%d]\n%s",
                          TransactionUtil.formatTxnName(catalog_proc, state.getTransactionId()),
                          state.hashCode(), state);
        boolean compute_path = true;
        if (hstore_conf.site.markov_fast_path && currentVertex.isStartVertex() == false) {
            List<MarkovVertex> initialPath = ((MarkovEstimate)state.getInitialEstimate()).getMarkovPath();
            if (initialPath.contains(currentVertex)) {
                if (debug.val)
                    LOG.debug(String.format("%s - Using fast path estimation for %s[#%d]",
                              TransactionUtil.formatTxnName(catalog_proc, state.getTransactionId()), markov, markov.getGraphId()));
                if (this.profiler != null) timestamp = ProfileMeasurement.getTime();
                try {
                    MarkovPathEstimator.fastEstimation(est, initialPath, currentVertex);
                    compute_path = false;
                } finally {
                    if (this.profiler != null) this.profiler.fastest_time.appendTime(timestamp);
                }
            }
        }
        // We'll reuse the last MarkovPathEstimator (and it's path) if the graph has been accurate for
        // other previous transactions. This prevents us from having to recompute the path every single time,
        // especially for single-partition transactions where the clustered MarkovGraphs are accurate
        else if (hstore_conf.site.markov_path_caching) {
            List<MarkovVertex> cached = this.cached_paths.get(markov);
            if (cached == null) {
                if (debug.val)
                    LOG.debug(String.format("%s - No cached path available for %s[#%d]",
                              TransactionUtil.formatTxnName(catalog_proc, state.getTransactionId()),
                              markov, markov.getGraphId()));
            }
            else if (markov.getAccuracyRatio() < hstore_conf.site.markov_path_caching_threshold) {
                if (debug.val)
                    LOG.debug(String.format("%s - MarkovGraph %s[#%d] accuracy is below caching threshold [%.02f < %.02f]",
                              TransactionUtil.formatTxnName(catalog_proc, state.getTransactionId()),
                              markov, markov.getGraphId(), markov.getAccuracyRatio(),
                              hstore_conf.site.markov_path_caching_threshold));
            }
            else {
                if (debug.val)
                    LOG.debug(String.format("%s - Using cached path for %s[#%d]",
                              TransactionUtil.formatTxnName(catalog_proc, state.getTransactionId()),
                              markov, markov.getGraphId()));
                if (this.profiler != null) timestamp = ProfileMeasurement.getTime();
                try {
                    MarkovPathEstimator.fastEstimation(est, cached, currentVertex);
                    compute_path = false;
                } finally {
                    if (this.profiler != null) this.profiler.cachedest_time.appendTime(timestamp);
                }
            }
        }
        
        // Use the MarkovPathEstimator to estimate a new path for this txn
        if (compute_path) {
            if (debug.val)
                LOG.debug(String.format("%s - Need to compute new path in %s[#%d] using %s",
                          TransactionUtil.formatTxnName(catalog_proc, state.getTransactionId()),
                          markov, markov.getGraphId(),
                          MarkovPathEstimator.class.getSimpleName()));
            MarkovPathEstimator pathEstimator = null;
            try {
                pathEstimator = (MarkovPathEstimator)this.pathEstimatorsPool.borrowObject();
                pathEstimator.init(state.getMarkovGraph(), est, args, state.getBasePartition());
                
                pathEstimator.setForceTraversal(hstore_conf.site.markov_force_traversal);
                pathEstimator.setLearningEnabled(hstore_conf.site.markov_learning_enable);
            } catch (Throwable ex) {
                String txnName = TransactionUtil.formatTxnName(catalog_proc, state.getTransactionId());
                String msg = "Failed to intitialize new MarkovPathEstimator for " + txnName; 
                LOG.error(msg, ex);
                throw new RuntimeException(msg, ex);
            }
            
            if (this.profiler != null) timestamp = ProfileMeasurement.getTime();
            try {
                pathEstimator.traverse(est.getVertex());
            } catch (Throwable ex) {
                try {
                    GraphvizExport<MarkovVertex, MarkovEdge> gv = MarkovUtil.exportGraphviz(markov, true, markov.getPath(pathEstimator.getVisitPath()));
                    LOG.error("GRAPH #" + markov.getGraphId() + " DUMP: " + gv.writeToTempFile(catalog_proc));
                } catch (Exception ex2) {
                    throw new RuntimeException(ex2);
                }
                String msg = "Failed to estimate path for " + TransactionUtil.formatTxnName(catalog_proc, state.getTransactionId());
                LOG.error(msg, ex);
                throw new RuntimeException(msg, ex);
            } finally {
                if (this.profiler != null) this.profiler.fullest_time.appendTime(timestamp);
            }
            
            // If our path was incomplete or we created new vertices during the traversal,
            // then we should tell the PartitionExecutor that we need updates about this
            // txn so that we can populate the MarkovGraph
            if (hstore_conf.site.markov_learning_enable && est.isInitialEstimate()) {
                Collection<MarkovVertex> createdVertices = pathEstimator.getCreatedVertices();
                MarkovVertex v = CollectionUtil.last(est.getMarkovPath()); 
                if ((createdVertices != null && createdVertices.isEmpty() == false) ||
                     (v.isQueryVertex() == true || v.isStartVertex())) {
                    if (debug.val)
                        LOG.debug(String.format("Enabling runtime updates for %s " +
                              "[createdVertices=%s, lastVertex=%s]",
                              state.getTransactionId(), createdVertices, v));
                    state.shouldAllowUpdates(true);
                }
            }
            
            this.pathEstimatorsPool.returnObject(pathEstimator);
        }
    }
    
    /**
     * Figure out the next vertex that the txn will transition to for the give Statement catalog object
     * and the partitions that it will touch when it is executed. If no vertex exists, we will create
     * it and dynamically add it to our MarkovGraph
     * @param txn_id
     * @param state
     * @param catalog_stmt
     * @param partitions
     */
    private MarkovVertex consume(MarkovEstimatorState state,
                                 MarkovGraph markov,
                                 Statement catalog_stmt,
                                 PartitionSet partitions,
                                 int queryCounter) {
        long timestamp = -1l;
        if (this.profiler != null) timestamp = ProfileMeasurement.getTime();
        
        // Update the number of times that we have executed this query in the txn
        if (queryCounter < 0) queryCounter = state.updateQueryInstanceCount(catalog_stmt);
        assert(markov != null);
        
        // Examine all of the vertices that are adjacent to our current vertex
        // and see which vertex we are going to move to next
        PartitionSet touchedPartitions = state.getTouchedPartitions();
        MarkovVertex current = state.getCurrent();
        assert(current != null);
        MarkovVertex next_v = null;
        MarkovEdge next_e = null;
        
        // Synchronize on the single vertex so that it's more fine-grained than the entire graph
        synchronized (current) {
            Collection<MarkovEdge> edges = markov.getOutEdges(current);
            if (edges != null) {
                if (debug.val)
                    LOG.debug(String.format("Examining %d edges from %s for txn #%d",
                              edges.size(), current, state.getTransactionId())); 
                for (MarkovEdge e : edges) {
                    MarkovVertex v = markov.getDest(e);
                    if (v.isEqual(catalog_stmt, partitions, touchedPartitions, queryCounter)) {
                        if (debug.val)
                            LOG.debug("Found next vertex " + v + " for Txn #" + state.getTransactionId());
                        next_v = v;
                        next_e = e;
                        break;
                    }
                } // FOR
            }
        
            // If we fail to find the next vertex, that means we have to dynamically create a new 
            // one. The graph is self-managed, so we don't need to worry about whether 
            // we need to recompute probabilities.
            if (next_v == null) {
                next_v = new MarkovVertex(catalog_stmt,
                                          MarkovVertex.Type.QUERY,
                                          queryCounter,
                                          partitions,
                                          touchedPartitions);
                assert(markov.containsVertex(current)) :
                    String.format("%s does not have current vertex %s for %s",
                                  markov, current,
                                  TransactionUtil.formatTxnName(markov.getProcedure(), state.getTransactionId())); 
                markov.addVertex(next_v);
                next_e = markov.addToEdge(current, next_v);
                if (debug.val)
                    LOG.debug(String.format("Created new edge from %s to new vertex %s for txn #%d", 
                              state.getCurrent(), next_v, state.getTransactionId()));
                // assert(state.getCurrent().getPartitions().size() <= touchedPartitions.size());
            }
        } // SYNCH
        
        if (current.isStartVertex() && next_v.isCommitVertex()) {
            throw new ServerFaultException("Trying to connect START->COMMIT", state.getTransactionId());
        }


        // Update the counters and other info for the next vertex and edge
        if (this.enable_recomputes) {
            this.markovTimes.addInstanceTime(next_v, state.getTransactionId(), state.getExecutionTimeOffset());
        }
        
        // Update the state information
        state.setCurrent(next_v, next_e);
        if (debug.val)
            LOG.debug("Updated State Information for txn #" + state.getTransactionId() + 
                      (trace.val ? "\n" + state : ""));
        if (this.profiler != null) this.profiler.consume_time.appendTime(timestamp);
        return (next_v);
    }


    // ----------------------------------------------------------------------------
    // HELPER METHODS
    // ----------------------------------------------------------------------------
    
    public MarkovEstimatorState processTransactionTrace(TransactionTrace txn_trace) throws Exception {
        Long txn_id = txn_trace.getTransactionId();
        if (debug.val) {
            LOG.debug("Processing TransactionTrace #" + txn_id);
            if (trace.val)
                LOG.trace(txn_trace.debug(this.catalogContext.database));
        }
        MarkovEstimatorState s = (MarkovEstimatorState)this.startTransaction(txn_id,
                                               txn_trace.getCatalogItem(this.catalogContext.database),
                                               txn_trace.getParams());
        assert(s != null) : "Null EstimatorState for txn #" + txn_id;
        
        for (Entry<Integer, List<QueryTrace>> e : txn_trace.getBatches().entrySet()) {
            int batch_size = e.getValue().size();
            if (trace.val) LOG.trace(String.format("Batch #%d: %d traces", e.getKey(), batch_size));
            
            // Generate the data structures we will need to give to the TransactionEstimator
            Statement catalog_stmts[] = new Statement[batch_size];
            PartitionSet partitions[] = new PartitionSet[batch_size];
            this.populateQueryBatch(e.getValue(), s.getBasePartition(), catalog_stmts, partitions);
        
            synchronized (s.getMarkovGraph()) {
                this.executeQueries(s, catalog_stmts, partitions);
            } // SYNCH
        } // FOR (batches)
        if (txn_trace.isAborted()) {
            this.abort(s, Status.ABORT_USER);
        } else {
            this.commit(s);
        }
        
        assert(s.getEstimateCount() == txn_trace.getBatchCount()) :
            String.format("EstimateCount[%d] != BatchCount[%d]",
                          s.getEstimateCount(), txn_trace.getBatchCount());
        assert(s.actual_path.size() == (txn_trace.getQueryCount() + 2)) :
            String.format("Path[%d] != QueryCount[%d]",
                          s.actual_path.size(), txn_trace.getQueryCount());
        return (s);
    }
    
    private boolean populateQueryBatch(List<QueryTrace> queries, int base_partition, Statement catalog_stmts[], PartitionSet partitions[]) throws Exception {
        int i = 0;
        boolean readOnly = true;
        for (QueryTrace query_trace : queries) {
            assert(query_trace != null);
            catalog_stmts[i] = query_trace.getCatalogItem(catalogContext.database);
            partitions[i] = new PartitionSet();
            this.p_estimator.getAllPartitions(partitions[i], query_trace, base_partition);
            assert(partitions[i].isEmpty() == false) : "No partitions for " + query_trace;
            readOnly = readOnly && catalog_stmts[i].getReadonly();
            i++;
        } // FOR
        return (readOnly);
    }
    
    protected Pair<MarkovEdge, MarkovVertex> getCachedBatchEnd(MarkovVertex start, Statement catalog_stmt, int idx, PartitionSet partitions, PartitionSet past_partitions) {
        Map<MultiKey<String>, Pair<MarkovEdge, MarkovVertex>> m = this.cache_batchEnd.get(start);
        Pair<MarkovEdge, MarkovVertex> found = null;
        if (m != null) {
            MultiKey<String> cache_key = new MultiKey<String>(CatalogKey.createKey(catalog_stmt),
                                                              Integer.toString(idx),
                                                              partitions.toString(),
                                                              past_partitions.toString());
            found = m.get(cache_key);
        }
        return (found);
    }
    
    protected void addCachedBatchEnd(MarkovVertex start, MarkovEdge e, MarkovVertex v,
                                     Statement catalog_stmt, int idx, PartitionSet partitions, PartitionSet past_partitions) {
        ConcurrentHashMap<MultiKey<String>, Pair<MarkovEdge, MarkovVertex>> m = cache_batchEnd.get(start);
        if (m == null) {
            synchronized (this.cache_batchEnd) {
                m = this.cache_batchEnd.get(start);
                if (m == null) {
                    m = new ConcurrentHashMap<MultiKey<String>, Pair<MarkovEdge, MarkovVertex>>();
                    this.cache_batchEnd.put(start, m);
                }
            } // SYNCH
        }
        MultiKey<String> cache_key = new MultiKey<String>(CatalogKey.createKey(catalog_stmt),
                                                          Integer.toString(idx),
                                                          partitions.toString(),
                                                          past_partitions.toString());
        m.putIfAbsent(cache_key, Pair.of(e, v));
    }
    
    // ----------------------------------------------------------------------------
    // DEBUG METHODS
    // ----------------------------------------------------------------------------
    
    public class Debug implements DebugContext {
    
        public TypedObjectPool<MarkovPathEstimator> getPathEstimatorsPool() {
            return (pathEstimatorsPool);
        }
        
        public MarkovEstimatorProfiler getProfiler() {
            return (profiler);
        }
    } // CLASS
    
    private MarkovEstimator.Debug cachedDebugContext;
    public MarkovEstimator.Debug getDebugContext() {
        if (cachedDebugContext == null) {
            cachedDebugContext = new MarkovEstimator.Debug();
        }
        return cachedDebugContext;
    }


}
Source Code of edu.brown.hstore.estimators.markov.MarkovEstimator

Related Classes of edu.brown.hstore.estimators.markov.MarkovEstimator