Package ivory.smrf.model.importance

Source Code of ivory.smrf.model.importance.LinearImportanceModel

/*
* Ivory: A Hadoop toolkit for web-scale information retrieval
*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You may
* obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

package ivory.smrf.model.importance;

import ivory.core.ConfigurationException;
import ivory.core.RetrievalException;
import ivory.core.util.XMLTools;
import ivory.smrf.model.Clique;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.List;
import java.util.Map;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;

import edu.umd.cloud9.util.map.HMapKF;

/**
* @author Don Metzler
*/
public class LinearImportanceModel extends ConceptImportanceModel {

  // MetaFeatures.
  private final List<MetaFeature> metafeatures = Lists.newArrayList();

  // MetaFeature values.
  private final Map<MetaFeature, HMapKF<String>> metafeatureValues = Maps.newHashMap();

  // Default feature values for each meta feature.
  private final HMapKF<String> defaultValues = new HMapKF<String>();

  public void configure(Node model) throws ConfigurationException {
    // Clear meta-feature data.
    metafeatures.clear();
    metafeatureValues.clear();
    defaultValues.clear();

    // Construct MRF feature by feature.
    NodeList children = model.getChildNodes();

    float totalMetaFeatureWeight = 0.0f;
    for (int i = 0; i < children.getLength(); i++) {
      Node child = children.item(i);

      if ("feature".equals(child.getNodeName())) {

        // collection_freq, document_freq, clue_cf, or enwiki_cf
        String metaFeatureName = XMLTools.getAttributeValue(child, "id", "");
        float metaFeatureWeight = XMLTools.getAttributeValue(child, "weight", -1.0f);

        if (metaFeatureName == "" || metaFeatureWeight == -1) {
          throw new ConfigurationException("Must specify metafeature name and weight.");
        }

        MetaFeature mf = new MetaFeature(metaFeatureName, metaFeatureWeight);
        metafeatures.add(mf);

        totalMetaFeatureWeight += metaFeatureWeight;

        String file = XMLTools.getAttributeValue(child, "file", null);
        if (file == null) {
          throw new ConfigurationException(
              "Must specify the location of the metafeature stats file.");
        }

        try {
          metafeatureValues.put(mf, readDataStats(file));
        } catch (IOException e) {
          throw new RetrievalException("Error: " + e);
        }

        float defaultValue = XMLTools.getAttributeValue(child, "default", 0.0f);
        defaultValues.put(mf.getName(), defaultValue);
      }
    }

    // Normalize meta feature weights.
    for (int i = 0; i < metafeatures.size(); i++) {
      MetaFeature mf = (MetaFeature) metafeatures.get(i);
      float w = mf.getWeight() / totalMetaFeatureWeight;
      mf.setWeight(w);
    }
  }

  @Override
  public float getConceptWeight(String concept) {
    // Compute query-dependent clique weight.
    float weight = 0.0f;
    for (MetaFeature mf : metafeatures) {
      float metaWeight = mf.getWeight();
      float cliqueFeatureVal = computeFeatureValue(concept, mf);
      weight += metaWeight * cliqueFeatureVal;
    }

    return weight;
  }

  @Override
  public float getCliqueWeight(Clique c) {
    return getConceptWeight(c.getConcept());
  }

  public float computeFeatureValue(String cliqueTerms, MetaFeature f) {
    float count;

    // Get meta-feature values for f.
    HMapKF<String> mfValues = metafeatureValues.get(f);

    // Look up value for clique terms.
    if (mfValues != null && mfValues.containsKey(cliqueTerms)) {
      count = mfValues.get(cliqueTerms);
    } else {
      count = defaultValues.get(f.getName());
    }

    return count;
  }

  public List<MetaFeature> getMetaFeatures() {
    return metafeatures;
  }

  // Reads MetaFeature statistics from a file,
  public static HMapKF<String> readDataStats(String file) throws IOException {
    Configuration conf = new Configuration();
    HMapKF<String> values = new HMapKF<String>();

    FileSystem fs = FileSystem.get(conf);
    BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(file))));

    String line;
    while ((line = in.readLine()) != null) {
      String[] tokens = line.split("\t");

      String concept = tokens[0];
      float value = Float.parseFloat(tokens[1]);

      values.put(concept, value);
    }

    return values;
  }
}
TOP

Related Classes of ivory.smrf.model.importance.LinearImportanceModel

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.