Package edu.umd.cloud9.io.map

Examples of edu.umd.cloud9.io.map.HMapIFW


   *     mapping from F-terms to their df values
   * @return
   *     mapping from E-terms to their computed df values
   */
  public static HMapIFW translateDFTable(Vocab eVocabSrc, Vocab fVocabTrg, TTable_monolithic_IFAs e2f_probs, HMapSIW dfs){
    HMapIFW transDfTable = new HMapIFW();
    for(int e=1;e<eVocabSrc.size();e++){
      int[] fS = null;
      try {
        fS = e2f_probs.get(e).getTranslations(0.0f);
      } catch (Exception e1) {
        e1.printStackTrace();
      }
      float df=0;
      for(int f : fS){
        float probEF = e2f_probs.get(e, f);
        String fTerm = fVocabTrg.get(f);
        if(!dfs.containsKey(fTerm)){  //only if word is in the collection, can it contribute to the df values.
          continue;
        }     
        float df_f = dfs.get(fTerm);
        df+=(probEF*df_f);
      }
      transDfTable.put(e, df);
    }
    return transDfTable;
  }
View Full Code Here


  private int docLength;
  private HMapIFW weightedTerms;

  public WeightedIntDocVector () {
    docLength = 0;
    weightedTerms = new HMapIFW ();
  }
View Full Code Here

    weightedTerms.write (out);
  }

  public void readFields (DataInput in) throws IOException {
    docLength = WritableUtils.readVInt (in);
    weightedTerms = new HMapIFW ();
    weightedTerms.readFields (in);
  }
View Full Code Here

   *     FileSystem object
   * @return
   *     mapping from term ids to df values
   */
  public static HMapIFW readTransDfTable(Path path, FileSystem fs) {
    HMapIFW transDfTable = new HMapIFW();
    try {
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, fs.getConf());

      IntWritable key = (IntWritable) reader.getKeyClass().newInstance();
      FloatWritable value = (FloatWritable) reader.getValueClass().newInstance();

      while (reader.next(key, value)) {
        transDfTable.put(key.get(), value.get());

        key = (IntWritable) reader.getKeyClass().newInstance();
        value = (FloatWritable) reader.getValueClass().newInstance();
      }
      reader.close();
View Full Code Here

   *     contains mapping from F-terms to their df values
   * @return
   *     mapping from E-terms to their computed df values
   */
  public static HMapIFW translateDFTable(Vocab eVocabSrc, Vocab fVocabTrg, TTable_monolithic_IFAs e2f_probs, PrefixEncodedGlobalStats globalStatsMap){
    HMapIFW transDfTable = new HMapIFW();
    for(int e=1;e<eVocabSrc.size();e++){
      int[] fS = e2f_probs.get(e).getTranslations(0.0f);
      float df=0;
      for(int f : fS){
        float probEF = e2f_probs.get(e, f);
        String fTerm = fVocabTrg.get(f);
        float df_f = globalStatsMap.getDF(fTerm);

        df+=(probEF*df_f);
      }
      transDfTable.put(e, df);
    }
    return transDfTable;
  }
View Full Code Here

   *     mapping from F-terms to their df values
   * @return
   *     mapping from E-terms to their computed df values
   */
  public static HMapIFW translateDFTable(Vocab eVocabSrc, Vocab fVocabTrg, TTable_monolithic_IFAs e2f_probs, HMapSIW dfs){
    HMapIFW transDfTable = new HMapIFW();
    for(int e=1;e<eVocabSrc.size();e++){
      int[] fS = null;
      try {
        fS = e2f_probs.get(e).getTranslations(0.0f);
      } catch (Exception e1) {
        e1.printStackTrace();
      }
      float df=0;
      for(int f : fS){
        float probEF = e2f_probs.get(e, f);
        String fTerm = fVocabTrg.get(f);
        if(!dfs.containsKey(fTerm)){  //only if word is in the collection, can it contribute to the df values.
          continue;
        }     
        float df_f = dfs.get(fTerm);
        df+=(probEF*df_f);
      }
      transDfTable.put(e, df);
    }
    return transDfTable;
  }
View Full Code Here

      if(!language.equals("english")){
        docno.set(docno.get() + 1000000000)//to distinguish between the two collections in the PWSim sliding window algorithm
      }

      //translate doc vector   
      HMapIFW tfS = new HMapIFW();
     
      int docLen = CLIRUtils.translateTFs(doc, tfS, eVocabSrc, eVocabTrg, fVocabSrc, fVocabTrg, e2f_Probs, f2e_Probs, LOG);
      HMapSFW v = CLIRUtils.createTermDocVector(docLen, tfS, eVocabSrc, model, transDfTable, isNormalize, LOG);
     
      // if no translation of any word is in the target vocab, remove document i.e., our model wasn't capable of translating it.
View Full Code Here

     
      PrefixEncodedGlobalStats globalStatsMap;
      globalStatsMap = new PrefixEncodedGlobalStats(new Path(termsFile), fs2);
      globalStatsMap.loadDFStats(new Path(dfByTermFile), fs2);

      HMapIFW transDfTable = CLIRUtils.translateDFTable(eVocab_e2f, fVocab_e2f, en2DeProbs, globalStatsMap);

      SequenceFile.Writer writer = SequenceFile.createWriter(fs2, conf, new Path(transDfFile), IntWritable.class, FloatWritable.class);
      for(MapIF.Entry term : transDfTable.entrySet()){
        reporter.incrCounter(DF.TransDf, 1);
        writer.append(new IntWritable(term.getKey()), new FloatWritable(term.getValue()));
      }
      writer.close();
    }
View Full Code Here

    }

    public void map(IntWritable docno, WeightedIntDocVector docvectorIn,
        OutputCollector<IntWritable, MinhashSignature> output,
        Reporter reporter) throws IOException {
      HMapIFW docvector = docvectorIn.getWeightedTerms();
      signature.clear();

      for(int i=0;i<randomOrderings.size();i++){
        int minTerm = getMinHashTerm(docvector, (ArrayListOfIntsWritable) randomOrderings.get(i));
        signature.add(minTerm);
View Full Code Here

    }

    public void map(IntWritable docno, WeightedIntDocVector docvectorIn,
        OutputCollector<IntWritable, NBitSignature> output,
        Reporter reporter) throws IOException {
      HMapIFW docvector = docvectorIn.getWeightedTerms();
      FloatAsBytesWritable value;

      for(int i=0;i<randomUnitVectors.size();i++){
        value = (FloatAsBytesWritable) randomUnitVectors.get(i);
        double dprod = dotProduct(docvector,value);
View Full Code Here

TOP

Related Classes of edu.umd.cloud9.io.map.HMapIFW

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.