Package edu.umd.cloud9.io.pair

Examples of edu.umd.cloud9.io.pair.PairOfIntFloat


  // int numberOfTopics, int numberOfTerms, boolean approximateBeta) throws IOException {
  public static HMapIV<double[]> importBeta(SequenceFile.Reader sequenceFileReader,
      int numberOfTopics, int numberOfTerms) throws IOException {
    HMapIV<double[]> beta = new HMapIV<double[]>();

    PairOfIntFloat pairOfIntFloat = new PairOfIntFloat();

    HMapIDW hashMap = new HMapIDW();
    // HashMap hashMap = new HashMap();

    // ProbDist hashMap = null;
    // if (!approximateBeta) {
    // hashMap = new HashMap();
    // } else {
    // hashMap = new BloomMap();
    // }

    while (sequenceFileReader.next(pairOfIntFloat, hashMap)) {
      Preconditions.checkArgument(
          pairOfIntFloat.getLeftElement() > 0 && pairOfIntFloat.getLeftElement() <= numberOfTopics,
          "Invalid beta vector for term " + pairOfIntFloat.getLeftElement() + "...");

      // topic is from 1 to K
      int topicIndex = pairOfIntFloat.getLeftElement() - 1;
      double logNormalizer = pairOfIntFloat.getRightElement();
      // double logNormalizer = Math.log(pairOfIntFloat.getRightElement());
      // double logNormalizer = Math.log(hashMap.getNormalizeFactor());

      // logNormalizer = LogMath.add(pairOfIntFloat.getRightElement(),
      // Settings.DEFAULT_LOG_ETA + Math.log(numberOfTerms));
View Full Code Here


      Map<Integer, String> termIndex = new HashMap<Integer, String>();
      sequenceFileReader = new SequenceFile.Reader(fs, indexPath, conf);
      while (sequenceFileReader.next(intWritable, text)) {
        termIndex.put(intWritable.get(), text.toString());
      }
      PairOfIntFloat pairOfIntFloat = new PairOfIntFloat();
      // HMapIFW hmap = new HMapIFW();
      HMapIDW hmap = new HMapIDW();
      TreeMap<Double, Integer> treeMap = new TreeMap<Double, Integer>();
      sequenceFileReader = new SequenceFile.Reader(fs, betaPath, conf);
      while (sequenceFileReader.next(pairOfIntFloat, hmap)) {
        treeMap.clear();

        System.out.println("==============================");
        System.out.println("Top ranked " + topDisplay + " terms for Topic "
            + pairOfIntFloat.getLeftElement());
        System.out.println("==============================");

        Iterator<Integer> itr1 = hmap.keySet().iterator();
        int temp1 = 0;
        while (itr1.hasNext()) {
View Full Code Here

                String term = m2.group(1);
                if (!term.equals("NULL")) {
                  float prob = Float.parseFloat(m2.group(2));
                  int engIndex = trgVocab.addOrGet(term);
                  logger.debug("Added: "+term+" with index: "+engIndex+" and prob:"+prob);
                  indexProbPairs.add(new PairOfIntFloat(engIndex, prob));
                  sumOfProbs+=prob;
                }
              }
            }
            if(sumOfProbs > probThreshold){
View Full Code Here

                String term = m2.group(1);
                if ( !term.equals("NULL") ) {
                  float prob = Float.parseFloat(m2.group(2));
                  int engIndex = trgVocab.addOrGet(term);
                  logger.debug("Added: "+term+" with index: "+engIndex+" and prob:"+prob);
                  indexProbPairs.add(new PairOfIntFloat(engIndex, prob));
                  sumOfProbs += prob;
                }
              }
            }
            // if number of translations not set, we never cut-off, so all cases are long tails
View Full Code Here

                String term = m2.group(1);
                if (!term.equals("NULL")) {
                  float prob = Float.parseFloat(m2.group(2));
                  int engIndex = trgVocab.addOrGet(term);
                  logger.debug("Added: "+term+" with index: "+engIndex+" and prob:"+prob);
                  indexProbPairs.add(new PairOfIntFloat(engIndex, prob));
                  sumOfProbs+=prob;
                }
              }
            }
            if(sumOfProbs > probThreshold){
View Full Code Here

                String term = m2.group(1);
                if ( !term.equals("NULL") ) {
                  float prob = Float.parseFloat(m2.group(2));
                  int engIndex = trgVocab.addOrGet(term);
                  logger.debug("Added: "+term+" with index: "+engIndex+" and prob:"+prob);
                  indexProbPairs.add(new PairOfIntFloat(engIndex, prob));
                  sumOfProbs += prob;
                }
              }
            }
            // if number of translations not set, we never cut-off, so all cases are long tails
View Full Code Here

                String term = m2.group(1);
                if ( !term.equals("NULL") ) {
                  float prob = Float.parseFloat(m2.group(2));
                  int engIndex = trgVocab.addOrGet(term);
                  logger.debug("Added: "+term+" with index: "+engIndex+" and prob:"+prob);
                  indexProbPairs.add(new PairOfIntFloat(engIndex, prob));
                  sumOfProbs += prob;
                }
              }
            }
            // if number of translations not set, we never cut-off, so all cases are long tails
View Full Code Here

    @Override
    public void reduce(Text key, Iterable<PairOfIntLong> values, Context context)
        throws IOException, InterruptedException {
      String term = key.toString();
      Iterator<PairOfIntLong> iter = values.iterator();
      PairOfIntLong p = iter.next();
      int df = p.getLeftElement();
      long cf = p.getRightElement();
      WritableUtils.writeVInt(dfByTermOut, df);
      WritableUtils.writeVLong(cfByTermOut, cf);
      if (iter.hasNext()) {
        throw new RuntimeException("More than one record for term: " + term);
      }
View Full Code Here

      // map from the id back to text
      // sLogger.info("termid: " + key);
      String term = mTermIdMap.getTerm(key.get());
      // sLogger.info("term: " + term);
      PairOfIntLong pair = gs.getStats(term);

      if (pair == null) {
        p.setCf(-1);
        p.setDf(-1);
      } else {
        p.setCf(pair.getRightElement());
        p.setDf(pair.getLeftElement());
      }

      output.collect(key, p);
    }
View Full Code Here

      float sumProb2 = 0;
      for (Entry<String> entry : probDist.entrySet()) {
        float pr = entry.getValue() / sumProb;
        if (pr > lexProbThreshold) {
          sumProb2 += pr;
          sortedFilteredProbDist.add(new PairOfStringFloat(entry.getKey(), pr));
        }
      }

      // re-normalize values after removal of low-prob entries
      float cumProb = 0;
      int cnt = 0;
      while (cnt < maxNumTrans && cumProb < cumProbThreshold && !sortedFilteredProbDist.isEmpty()) {
        PairOfStringFloat entry = sortedFilteredProbDist.pollLast();
        float pr = entry.getValue() / sumProb2;
        cumProb += pr;
        normProbDist.put(entry.getKey(), pr);
        cnt++;
      }

      probMap.put(sourceTerm, normProbDist);
    }
View Full Code Here

TOP

Related Classes of edu.umd.cloud9.io.pair.PairOfIntFloat

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.