Examples of HMapIFW

edu.umd.cloud9.io.map.HMapIFW

Writable representing a map where keys are ints and values are floats.

One notable feature of this class is the ability to support lazy decoding, controlled by the {@link #setLazyDecodeFlag(boolean)} method. In lazydecoding mode, when an object of this type is deserialized, key-value pairs are not inserted into the map, but rather held in arrays. The reduces memory used in cases where random access to values is not required. In lazy decoding mode, the raw keys and values may be fetched by the {@link #getKeys()} and{@link #getValues()} methods, respectively. The map can be subsequentlypopulated with the {@link #decode()} method.
@author Jimmy Lin

Examples of edu.umd.cloud9.io.map.HMapIFW

      return s;
    }


    public void map(IntWritable docno, WeightedIntDocVector docvectorIn,
        OutputCollector<IntWritable, NBitSignature> output, Reporter reporter) throws IOException {
      HMapIFW docvector = docvectorIn.getWeightedTerms();
      FloatAsBytesWritable value;


      for (int i = 0; i < randomUnitVectors.size(); i++) {
        value = (FloatAsBytesWritable) randomUnitVectors.get(i);
        double dprod = dotProduct(docvector, value);

View Full Code Here

Examples of edu.umd.cloud9.io.map.HMapIFW

      }else if (numTerms < MIN_SIZE) {
        reporter.incrCounter(Docs.SHORT, 1);
        return;
      }


      HMapIFW tfS = new HMapIFW();
      // We simply use the source-language doc length since the ratio of doc length to average doc
      // length is unlikely to change significantly (not worth complicating the pipeline)
      int docLen = CLIRUtils.translateTFs(doc, tfS, eVocabSrc, eVocabTrg, fVocabSrc, fVocabTrg,
          e2f_Probs, f2e_Probs, tokenizer, LOG);

View Full Code Here

Examples of edu.umd.cloud9.io.map.HMapIFW

      }  


      DefaultFrequencySortedDictionary dict = new DefaultFrequencySortedDictionary(new Path(env.getIndexTermsData()), new Path(env.getIndexTermIdsData()), new Path(env.getIndexTermIdMappingData()), fs2);
      DfTableArray dfTable = new DfTableArray(new Path(dfByIntFile), fs2);


      HMapIFW transDfTable = CLIRUtils.translateDFTable(eVocab_e2f, fVocab_e2f, en2DeProbs, dict, dfTable);


      SequenceFile.Writer writer = SequenceFile.createWriter(fs2, conf, new Path(transDfFile), IntWritable.class, FloatWritable.class);
      for(MapIF.Entry term : transDfTable.entrySet()){
        reporter.incrCounter(DF.TransDf, 1);
        writer.append(new IntWritable(term.getKey()), new FloatWritable(term.getValue()));
      }
      writer.close();
    }

View Full Code Here

Examples of edu.umd.cloud9.io.map.HMapIFW

    }


    public void map(IntWritable docno, WeightedIntDocVector docvectorIn,
        OutputCollector<IntWritable, MinhashSignature> output, Reporter reporter)
    throws IOException {
      HMapIFW docvector = docvectorIn.getWeightedTerms();
      signature.clear();


      for (int i = 0; i < randomOrderings.size(); i++) {
        int minTerm = getMinHashTerm(docvector, (ArrayListOfIntsWritable) randomOrderings.get(i));
        signature.add(minTerm);

View Full Code Here

Examples of edu.umd.cloud9.io.map.HMapIFW

    for(String term : terms){
      term2Tf.increment(term);
    }


    //translated tf values
    HMapIFW transTermTf = new HMapIFW();
    for(Entry<String> entry : term2Tf.entrySet()){
      String fTerm = entry.getKey();
      int tf = entry.getValue();
      // transTermTf won't be updated if fTerm not in vocab
      transTermTf = CLIRUtils.updateTFsByTerm(fTerm, tf, transTermTf, eVocabSrc, eVocabTrg, fVocabSrc, fVocabTrg, e2f_Probs, f2e_Probs, eTok, sLogger);

View Full Code Here

Examples of edu.umd.cloud9.io.map.HMapIFW

    Tokenizer tokenizer = TokenizerFactory.createTokenizer(eLang, 
        eTokenizerModelFile, true, eStopwordsFile, eStopwordsFile + ".stemmed", null);


    // translate doc texts here
    for (HMapSIW deDoc : docs) {
      HMapIFW tfS = new HMapIFW();
      int docLen = 0;
      try {
        docLen = CLIRUtils.translateTFs(deDoc, tfS, eVocabSrc, eVocabTrg, fVocabSrc,
            fVocabTrg, e2f_Probs, f2e_Probs, tokenizer , null);   // tokenizer just for stopword list
      } catch (IOException e) {

View Full Code Here

Examples of edu.umd.cloud9.io.map.HMapIFW

      if (numTerms < MIN_SIZE) {
        reporter.incrCounter(Docs.SHORT, 1);
        return;
      }


      HMapIFW tfS = new HMapIFW();
      // We simply use the source-language doc length since the ratio of doc length to average doc
      // length is unlikely to change significantly (not worth complicating the pipeline)
      int docLen = CLIRUtils.translateTFs(doc, tfS, eVocabSrc, eVocabTrg, fVocabSrc, fVocabTrg,
          e2f_Probs, f2e_Probs, tokenizer, LOG);

View Full Code Here

Examples of edu.umd.cloud9.io.map.HMapIFW

      }  


      DefaultFrequencySortedDictionary dict = new DefaultFrequencySortedDictionary(new Path(env.getIndexTermsData()), new Path(env.getIndexTermIdsData()), new Path(env.getIndexTermIdMappingData()), fs2);
      DfTableArray dfTable = new DfTableArray(new Path(dfByIntFile), fs2);


      HMapIFW transDfTable = CLIRUtils.translateDFTable(eVocab_e2f, fVocab_e2f, en2DeProbs, dict, dfTable);


      SequenceFile.Writer writer = SequenceFile.createWriter(fs2, conf, new Path(transDfFile), IntWritable.class, FloatWritable.class);
      for(MapIF.Entry term : transDfTable.entrySet()){
        reporter.incrCounter(DF.TransDf, 1);
        writer.append(new IntWritable(term.getKey()), new FloatWritable(term.getValue()));
      }
      writer.close();
    }

View Full Code Here

Examples of edu.umd.cloud9.io.map.HMapIFW

   *     FileSystem object
   * @return
   *     mapping from term ids to df values
   */
  public static HMapIFW readTransDfTable(Path path, FileSystem fs) {
    HMapIFW transDfTable = new HMapIFW();
    try {
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, fs.getConf());


      IntWritable key = (IntWritable) reader.getKeyClass().newInstance();
      FloatWritable value = (FloatWritable) reader.getValueClass().newInstance();


      while (reader.next(key, value)) {
        transDfTable.put(key.get(), value.get());
        //        logger.info(key.get()+"-->"+value.get());
        key = (IntWritable) reader.getKeyClass().newInstance();
        value = (FloatWritable) reader.getValueClass().newInstance();
      }
      reader.close();

View Full Code Here

Examples of edu.umd.cloud9.io.map.HMapIFW

   *     ttable E-->F (i.e., Pr(f|e))
   * @return
   *     mapping from E-terms to their computed df values
   */
  public static HMapIFW translateDFTable(Vocab eVocabSrc, Vocab fVocabTrg, TTable_monolithic_IFAs e2f_probs, FrequencySortedDictionary dict, DfTableArray dfTable){
    HMapIFW transDfTable = new HMapIFW();
    for(int e=1;e<eVocabSrc.size();e++){
      int[] fS = e2f_probs.get(e).getTranslations(0.0f);
      float df=0;
      for(int f : fS){
        float probEF = e2f_probs.get(e, f);
        String fTerm = fVocabTrg.get(f);
        int id = dict.getId(fTerm); 
        if(id != -1){
          float df_f = dfTable.getDf(id);        
          df += (probEF*df_f);
        }else{
          logger.debug(fTerm+" not in dict");
        }
      }
      transDfTable.put(e, df);
    }
    return transDfTable;
  }

View Full Code Here

0 1 2 3 4 5

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.