Package sample.gbase.recipe

Source Code of sample.gbase.recipe.MostUsedValues

/* Copyright (c) 2006 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package sample.gbase.recipe;

import com.google.api.gbase.client.AttributeHistogram;
import com.google.api.gbase.client.FeedURLFactory;
import com.google.api.gbase.client.GoogleBaseEntry;
import com.google.api.gbase.client.GoogleBaseFeed;
import com.google.api.gbase.client.GoogleBaseQuery;
import com.google.api.gbase.client.GoogleBaseService;
import com.google.api.gbase.client.MetadataEntryExtension;
import com.google.api.gbase.client.AttributeHistogram.UniqueValue;
import com.google.gdata.util.ServiceException;

import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.List;
import java.util.Timer;
import java.util.TimerTask;

import javax.servlet.ServletContext;

/**
* Holds the most used values of attributes in Google Base and
* periodically refreshes them.
*
* A MostUsedValues object is focused on a GoogleBaseService and
* a FeedURLFactory. One object can be used to analyse only the items that
* match a specific query.
*
* The class currently works for TEXT attributes only.
*
* The cache() and clear() calls are synchronized,
* so that the different Maps and Collections remain in sync.
*
* Typically, right after you create a MostUsedValues object, you call the
* {@link #cache(long, int, ServletContext, String[])} method to set it up
* to cache the most used values of your attributes. This creates
* {@link java.util.TimerTask} objects that periodically refresh the cached
* values. After this, you use {@link #getMostUsedValuesForAttribute(String)}
* to get the most used values for the attributes you previously specified.
* In the end, when you no longer need the cache, you can call
* {@link #clear()} to stop the TimerTask objects and clear the cache.
*/
public class MostUsedValues {

  protected static final String TEXT_TYPE = "(text)";

  /**
   * The initial value of the max values limit when making a query;
   * the step used to increase that limit if some of the attributes
   * are not found.
   */
  protected static final int STEP_MAXRESULTS = 25;

  /**
   * The max of the max values limit when making a query.
   */
  protected static final int MAX_MAXRESULTS = 200;

  /**
   * The cached most used values.
   * This is a synchronized map.
   */
  private java.util.Map<String, String[]> mostUsedValues;
 
  /**
   * The timers that periodically refresh the cache.
   * The access to this object has to be synchronized.
   */
  private Collection<Timer> timers;

  private GoogleBaseService service;
  private FeedURLFactory urlFactory;
  private String queryString;
 
  /**
   * Creates an empty MostUsedValues.
   *
   * @param service any GoogleBaseService used to retrieve attribute histograms
   * @param urlFactory a FeedURLFactory used to create the URLs of
   *                   the attribute histograms
   * @param queryString the query string used to filter the analyzed items,
   *                    for example one that focuses only on the items that
   *                    have a specific item type.
   */
  public MostUsedValues(GoogleBaseService service,
                        FeedURLFactory urlFactory,
                        String queryString) {
    this.service = service;
    this.urlFactory = urlFactory;
    this.queryString = queryString;
   
    mostUsedValues = new Hashtable<String, String[]>();
    timers = new ArrayList<Timer>();
  }

  /**
   * Gets the cached most used values of an attribute.
   *
   * @param attrName the name of the attribute
   * @return a cached list of the most used values
   */
  public String[] getMostUsedValuesForAttribute(String attrName) {
    return mostUsedValues.get(attrName);
  }
 
  /**
   * Sets up the object to cache a limited number of the most used values of
   * each of the specified attributes; the cache is refreshed periodically.
   *
   * This method does not check if the specified attributes are already cached.
   * You have to make sure an attribute is specified only once in your calls.
   *
   * @param interval the cache refresh period, in millis
   * @param maxValues the maximum number of values to cache
   * @param servletContext a ServletContext to be used for logging
   * @param attrNames the names of the attributes to be cached
   */
  synchronized public void cache(long interval,
                                 final int maxValues,
                                 ServletContext servletContext,
                                 final String... attrNames) {
    if (attrNames.length == 0) {
      return;
    }

    Timer timer = new Timer(true);
    TimerTask task = createRefresher(maxValues, servletContext, attrNames);
    task.run();
    timer.schedule(task, interval, interval);
    timers.add(timer);
  }

  /**
   * Creates a TimerTask that refreshes the cached most used values of
   * the specified attributes.
   *
   * @param maxValues how many values to cache for each attribute
   * @param servletContext servlet context for logging error messages
   * @param attrNames the names of the attributes
   * @return a TimerTask that refreshes the cache
   */
  private TimerTask createRefresher(final int maxValues,
                                    final ServletContext servletContext,
                                    final String... attrNames) {
    TimerTask task = new TimerTask() {
      /**
       * Tells the MostUsedValues object that created this TimerTask to
       * refresh the cached most used values of some of the attributes.
       */
      @Override public void run() {
        try {
          MostUsedValues.this.retrieveMostUsedValues(maxValues, attrNames);
        } catch (IOException e) {
          servletContext.log(e.getMessage(), e);
        } catch (ServiceException e) {
          servletContext.log(e.getMessage() + " " +
                             e.getHttpErrorCodeOverride() + " " +
                             e.getResponseContentType() + ": " +
                             e.getResponseBody(), e);
        }
      }
    };
    return task;
  }

  /**
   * Retrieves the most used values for some attributes
   * for the items that match the query string
   * and stores a limited number of those values for each attribute.
   * 
   * @param numValue maximum number of values to store
   * @param attrNames the names of the attributes
   */
  protected void retrieveMostUsedValues(int numValue, final String... attrNames)
      throws ServiceException, IOException {
    URL url = urlFactory.getAttributesFeedURL();
    GoogleBaseQuery query = new GoogleBaseQuery(url);
   
    StringBuffer queryString = createQueryString(attrNames);
    query.setGoogleBaseQuery(queryString.toString());
    query.setMaxValues(numValue);

    int numResults = 0;
    int lastNumResults = 0;
    Collection<String> attrToRetrieve =
        new ArrayList<String>(Arrays.asList(attrNames));
    do {
      // Get the feed
      numResults += STEP_MAXRESULTS;
      query.setMaxResults(numResults);
      GoogleBaseFeed feed = service.query(query);
      if (lastNumResults == feed.getTotalResults()) {
        // No new entries to process
        break;
      }
      lastNumResults = feed.getTotalResults();
      // Extract the values from the entries
      Iterator<String> attrIter = attrToRetrieve.iterator();
      while (attrIter.hasNext()) {
        String attrName = attrIter.next();
        // Searching for the entry with the following name
        String entryTitle = attrName + TEXT_TYPE;
        for (GoogleBaseEntry entry : feed.getEntries()) {
          if (entryTitle.equals(entry.getTitle().getPlainText())) {
            extractValuesFromEntry(numValue, attrName, entry);
            attrIter.remove();
          }
        }
      }
    } while (!attrToRetrieve.isEmpty() && numResults <= MAX_MAXRESULTS);
   
    if (!attrToRetrieve.isEmpty()) {
      throw new ServiceException("The retrieved histograms do not contain" +
          "some of the attributes. The most used values of these attributes " +
          "have not been refreshed.");
    }
  }

  /**
   * Returns the query string extended so that it filters out the items
   * that do not have at least one of the specified attributes of type TEXT.
   *
   * @param attrNames the attributes we are interested in
   * @return an extended query string
   */
  protected StringBuffer createQueryString(final String... attrNames) {
    StringBuffer queryString = new StringBuffer(this.queryString);
    queryString.append("(");
    queryString.append("[").append(attrNames[0]).append(TEXT_TYPE).append("]");
    for (int i = 1; i < attrNames.length; i++) {
      String attrName = attrNames[i];
      queryString.append("|[").append(attrName).append(TEXT_TYPE).append("]");
    }
    queryString.append(")");
    return queryString;
  }

  /**
   * Caches a limited number of the values of a GoogleBaseEntry.
   *
   * @param numValue maximum number of values to cache
   * @param attrName the name of the attribute that has the values
   * @param entry an entry with a MetadataEntryExtension
   */
  private void extractValuesFromEntry(int numValue,
                                      String attrName,
                                      GoogleBaseEntry entry) {
    MetadataEntryExtension metadata = entry.getGoogleBaseMetadata();
    AttributeHistogram attributeHistogram = metadata.getAttributeHistogram();
    List<? extends UniqueValue> values = attributeHistogram.getValues();
   
    int valuesCount = Math.min(numValue, values.size());
    String[] usedValues = new String[valuesCount];
    for (int i = 0; i < valuesCount; i++) {
      usedValues[i] = values.get(i).getValueAsString();
    }
   
    updateMostUsedValue(attrName, usedValues);
  }

  /**
   * Cancels all refresh timers and clears the cache.
   */
  synchronized public void clear() {
    for (Timer timer : timers) {
      timer.cancel();
    }
    timers.clear();
    mostUsedValues.clear();
  }

  /**
   * Returns the number of cached attributes.
   */
  public int size() {
    return mostUsedValues.size();
  }

  /**
   * Returns true if no attribute is cached.
   */
  public boolean isEmpty() {
    return mostUsedValues.isEmpty();
  }

  public String getQueryString() {
    return queryString;
  }
 
  /**
   * Caches the most used values of an attribute.
   * @param attrName
   * @param stringValues
   */
  protected void updateMostUsedValue(String attrName, String[] stringValues) {
    mostUsedValues.put(attrName, stringValues);
  }
}
TOP

Related Classes of sample.gbase.recipe.MostUsedValues

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.