Package ivory.smrf.model.constrained

Source Code of ivory.smrf.model.constrained.GreedyConstrainedMRFBuilder

/*
* Ivory: A Hadoop toolkit for web-scale information retrieval
*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You may
* obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

package ivory.smrf.model.constrained;

import ivory.core.RetrievalEnvironment;
import ivory.core.exception.ConfigurationException;
import ivory.core.exception.RetrievalException;
import ivory.core.util.XMLTools;
import ivory.smrf.model.Clique;
import ivory.smrf.model.MarkovRandomField;
import ivory.smrf.model.importance.LinearImportanceModel;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.w3c.dom.Node;

import edu.umd.cloud9.util.map.HMapKF;

/**
* @author Lidan Wang
*/
public class GreedyConstrainedMRFBuilder extends ConstrainedMRFBuilder {

  // document frequencies, from which costs will be computed
  private HMapKF<String> dfs;

  // model style (either Indep or Joint)
  private String modelType;

  // bin multiple
  private float qlMultiple;

  // basic thresholds
  private float unigramAddThreshold;
  private float bigramAddThreshold;

  // redundancy thresholds
  private float unigramRedundThreshold;
  private float bigramRedundThreshold;

  // beta value
  private float beta;

  public GreedyConstrainedMRFBuilder(RetrievalEnvironment env, Node model)
      throws ConfigurationException, IOException {
    super(env, model);

    // model type
    modelType = XMLTools.getAttributeValue(model, "style", null);
    if (modelType == null || (!"Indep".equals(modelType) && !"Joint".equals(modelType))) {
      throw new RetrievalException(
          "Error: GreedyConstrainedMRFBuilder requires a model type attribute of Indep or Joint!");
    }

    // query likelihood
    qlMultiple = XMLTools.getAttributeValue(model, "qlMultiple", -1.0f);

    // unigram and bigram basic thresholds
    unigramAddThreshold = XMLTools.getAttributeValue(model, "unigramAddThreshold", -1.0f);
    bigramAddThreshold = XMLTools.getAttributeValue(model, "bigramAddThreshold", -1.0f);

    // unigram and bigram redundancy thresholds
    unigramRedundThreshold = XMLTools.getAttributeValue(model, "unigramRedundThreshold", -1.0f);
    bigramRedundThreshold = XMLTools.getAttributeValue(model, "bigramRedundThreshold", -1.0f);

    // beta value
    beta = XMLTools.getAttributeValue(model, "beta", -1.0f);

    if ("Indep".equals(modelType) && (qlMultiple == -1 || unigramAddThreshold == -1)) {
      throw new RetrievalException(
          "Error: Indep model must specify valid qlMultiple, unigramAddThreshold, and bigramAddThreshold attributes!");
    }

    if ("Joint".equals(modelType) &&
         (qlMultiple == -1 || unigramAddThreshold == -1 || bigramAddThreshold == -1
            || unigramRedundThreshold == -1 || bigramRedundThreshold == -1 || beta == -1)) {
      throw new RetrievalException(
          "Error: Joint model must specify valid qlMultiple, unigramAddThreshold, bigramAddThreshold, unigramRedundThreshold, bigramRedundThreshold, and beta attributes!");
    }

    String file = XMLTools.getAttributeValue(model, "file", null);
    if (file == null) {
      throw new RetrievalException(
          "Error: GreedyConstrainedMRFBuilder requires a file attribute specifying the location of the document frequencies!");
    }

    // Read document frequencies.
    dfs = LinearImportanceModel.readDataStats(file);
  }

  @Override
  protected MarkovRandomField buildConstrainedMRF(String[] queryTerms, MarkovRandomField mrf) {
    List<Clique> cliques = mrf.getCliques();

    float qlCost = 0.0f;
    Set<String> seenTerms = new HashSet<String>();

    int numQueryTerms = queryTerms.length;

    // generate constrained cliques
    List<ConstrainedClique> constrainedCliques = new ArrayList<ConstrainedClique>();
    for (Clique c : cliques) {
      // type of clique
      Clique.Type cliqueType = c.getType();

      // terms associated with clique
      String cliqueTerms = c.getConcept();

      ConstrainedClique newClique = new ConstrainedClique(c);

      // get+set analytical cost
      float analyticalCost = getCost(cliqueTerms);
      newClique.setAnalyticalCost(analyticalCost);

      // get+set profit density
      float profitDensity = c.getWeight() / analyticalCost;
      newClique.setProfitDensity(profitDensity);

      if (cliqueType.equals(Clique.Type.Term)) {
        if (!(seenTerms.contains(cliqueTerms))) {
          qlCost += analyticalCost;
          seenTerms.add(cliqueTerms);
        }
      }

      constrainedCliques.add(newClique);
    }

    float binConstraint = qlMultiple * qlCost;

    List<ConstrainedClique> selectedCliques = null;
    if ("Indep".equals(modelType) || numQueryTerms == 1) {
      selectedCliques = ConstraintModel.greedyKnapsack(constrainedCliques, binConstraint,
          unigramAddThreshold, bigramAddThreshold);
    } else if ("Joint".equals(modelType)) {
      selectedCliques = ConstraintModel.greedyJoint(constrainedCliques, binConstraint,
          unigramAddThreshold, bigramAddThreshold, unigramRedundThreshold,
          bigramRedundThreshold, beta);
    }

    // construct constrained mrf
    MarkovRandomField constrainedMRF = new MarkovRandomField(queryTerms, env);
    for (Clique c : selectedCliques) {
      constrainedMRF.addClique(c);
    }

    return constrainedMRF;
  }

  private float getCost(String cliqueTerms) {
    float r = 0;

    String[] terms = cliqueTerms.trim().split("\\s+");

    for (int k = 0; k < terms.length; k++) {

      if (dfs.containsKey(terms[k])) {
        r += Math.log(dfs.get(terms[k]) + 1.01);
        // Lidan: add 0.01 in case df = 1, log(1) = 0.
      } else {
        r += Math.log(1.01);
      }
    }

    return r;
  }
}
TOP

Related Classes of ivory.smrf.model.constrained.GreedyConstrainedMRFBuilder

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.