Examples of edu.umd.cloud9.io.array.ArrayListOfIntsWritable

edu.umd.cloud9.io.array.ArrayListOfIntsWritable
Writable extension of the {@code ArrayListOfInts} class. This class provides an efficient datastructure to store a list of ints for MapReduce jobs. @author Jimmy Lin

      int massMessages = 0;


      // Distribute PageRank mass to neighbors (along outgoing edges).
      if (node.getAdjacenyList().size() > 0) {
        // Each neighbor gets an equal share of PageRank mass.
        ArrayListOfIntsWritable list = node.getAdjacenyList();
        float mass = node.getPageRank() - (float) StrictMath.log(list.size());


        // Iterate over neighbors.
        for (int i = 0; i < list.size(); i++) {
          neighbor.set(list.get(i));
          intermediateMass.set(mass);


          // Emit messages with PageRank mass to neighbors.
          context.write(neighbor, intermediateMass);
          massMessages++;

View Full Code Here

      int massMessagesSaved = 0;


      // Distribute PageRank mass to neighbors (along outgoing edges).
      if (node.getAdjacenyList().size() > 0) {
        // Each neighbor gets an equal share of PageRank mass.
        ArrayListOfIntsWritable list = node.getAdjacenyList();
        float mass = node.getPageRank() - (float) StrictMath.log(list.size());


        // Iterate over neighbors.
        for (int i = 0; i < list.size(); i++) {
          int neighbor = list.get(i);


          if (map.containsKey(neighbor)) {
            // Already message destined for that node; add PageRank mass contribution.
            massMessagesSaved++;
            map.put(neighbor, sumLogProbs(map.get(neighbor), mass));

View Full Code Here

    public void map(LongWritable key, Text value,
        OutputCollector<IntWritable, HITSNode> output, Reporter reporter)
        throws IOException {


      HITSNode dataOut = new HITSNode();
      ArrayListOfIntsWritable links = new ArrayListOfIntsWritable();
      dataOut.setType(HITSNode.TYPE_HUB_COMPLETE);


      String line = ((Text) value).toString();
      StringTokenizer itr = new StringTokenizer(line);


      if (itr.hasMoreTokens()) {
        int curr = Integer.parseInt(itr.nextToken());
        if (stopList.contains(curr)) {
          return;
        }
        keyOut.set(curr);
        dataOut.setNodeId(keyOut.get());
      }
      while (itr.hasMoreTokens()) {
        // links = new ArrayListOfIntsWritable();
        int curr = Integer.parseInt(itr.nextToken());
        if (!(stopList.contains(curr))) {
          links.add(curr);
        }
      }
      dataOut.setOutlinks(links);
      dataOut.setHRank((float) 0.0);
      System.out.println(">>>" + keyOut.get() + " | " + dataOut.toString());

View Full Code Here

      adjList.clear();
      // adjList.trimToSize();


      while (values.hasNext()) {
        valIn = values.next();
        ArrayListOfIntsWritable adjListIn = valIn.getOutlinks();
        adjListIn.trimToSize();
        adjList.addUnique(adjListIn.getArray());
        valOut.setNodeId(valIn.getNodeId());
      }
      valOut.setOutlinks(adjList);
      valOut.setType(HITSNode.TYPE_HUB_COMPLETE);
      valOut.setHRank((float) 0.0);

View Full Code Here


    if (type.equals(Type.Complete)) {
      pagerank = in.readFloat();
    }


    adjacenyList = new ArrayListOfIntsWritable();
    adjacenyList.readFields(in);
  }

View Full Code Here

  public void readFields(DataInput in) throws IOException {
    mType = in.readByte();


    mNodeId = in.readInt();


    mInlinks = new ArrayListOfIntsWritable();
    mOutlinks = new ArrayListOfIntsWritable();
    
    if (mType == TYPE_HUB_MASS || mType == TYPE_NODE_MASS) {
      mHRank = in.readFloat();
      return;
    }

View Full Code Here

      SequenceFile.Reader sequenceFileReader, SequenceFile.Writer sequenceFileWriter)
      throws IOException {
    Map<String, Integer> termIndex = ParseCorpus.importParameter(sequenceFileReader);


    IntWritable intWritable = new IntWritable();
    ArrayListOfIntsWritable arrayListOfIntsWritable = new ArrayListOfIntsWritable();


    StringTokenizer stk = null;
    String temp = null;


    String line = bufferedReader.readLine();
    int index = 0;
    while (line != null) {
      index++;
      intWritable.set(index);
      arrayListOfIntsWritable.clear();


      stk = new StringTokenizer(line);
      while (stk.hasMoreTokens()) {
        temp = stk.nextToken();
        if (termIndex.containsKey(temp)) {
          arrayListOfIntsWritable.add(termIndex.get(temp));
        } else {
          sLogger.info("How embarrassing! Term " + temp + " not found in the index file...");
        }
      }

View Full Code Here

  public static HMapIV<Set<Integer>> importEta(SequenceFile.Reader sequenceFileReader)
      throws IOException {
    HMapIV<Set<Integer>> lambdaMap = new HMapIV<Set<Integer>>();


    IntWritable intWritable = new IntWritable();
    ArrayListOfIntsWritable arrayListOfInts = new ArrayListOfIntsWritable();


    while (sequenceFileReader.next(intWritable, arrayListOfInts)) {
      Preconditions.checkArgument(intWritable.get() > 0, "Invalid eta prior for term "
          + intWritable.get() + "...");


      // topic is from 1 to K
      int topicIndex = intWritable.get();
      Set<Integer> hashset = new HashSet<Integer>();


      Iterator<Integer> itr = arrayListOfInts.iterator();
      while (itr.hasNext()) {
        hashset.add(itr.next());
      }


      lambdaMap.put(topicIndex, hashset);

View Full Code Here

          int fDocno = key.getRightElement();
          fDocno -= 1000000000; 
          int eDocno = key.getLeftElement();
          if(lang.equals("en")){
            if(!pwsimMapping.containsKey(eDocno)){
              pwsimMapping.put(eDocno, new ArrayListOfIntsWritable());
            }
            pwsimMapping.get(eDocno).add(fDocno);    // we add 1000000000 to foreign docnos to distinguish them during pwsim algo
          }else{
            if(!pwsimMapping.containsKey(fDocno)){
              pwsimMapping.put(fDocno, new ArrayListOfIntsWritable());
            }
            pwsimMapping.get(fDocno).add(eDocno);    // we add 1000000000 to foreign docnos to distinguish them during pwsim algo
          }
          key = (PairOfInts) reader.getKeyClass().newInstance();
          value = (IntWritable) reader.getValueClass().newInstance();

View Full Code Here


    public void map(Writable docnoKey, Indexable page, OutputCollector<PairOfInts, WikiDocInfo> output, Reporter reporter) throws IOException {
      int docno = ((IntWritable)docnoKey).get();
      WikipediaPage p = (WikipediaPage) page;
      String lang = p.getLanguage();
      ArrayListOfIntsWritable similarDocnos;


      // we only load the mapping once, during the first map() call of a mapper. 
      // this works b/c all input kv pairs of a given mapper will have same lang id (reason explained above)
      if(pwsimMapping.isEmpty()){
        loadPairs(pwsimMapping, lang, mJob, reporter);
        sLogger.debug(pwsimMapping.size());
      }
      
      // if no similar docs for docno, return
      if(pwsimMapping.containsKey(docno)){
        similarDocnos = pwsimMapping.get(docno);   
      }else{
        return;
      }


      ArrayListWritable<Text> sentences;
      ArrayListWritable<HMapSFW> vectors = new ArrayListWritable<HMapSFW>();
      ArrayListOfIntsWritable sentLengths = new ArrayListOfIntsWritable();
      try {
        if(lang.equals("en")){
          // identify sentences in document, filter out ones below MinSentLength threshold
          // convert each sentence into a tf-idf vector, using general DF map for collection and a heuristic for avg. doc length
          // filter out sentences for which the vector has less than MinVectorTerms terms

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of edu.umd.cloud9.io.array.ArrayListOfIntsWritable

bak.pcj.IntIterator

cc.mrlda.InformedPrior

edu.umd.cloud9.example.bfs.BfsNodeTest

edu.umd.cloud9.example.pagerank.PageRankNodeTest

edu.umd.cloud9.io.array.ArrayListOfIntsWritable

edu.umd.cloud9.io.fastutil.Int2FloatOpenHashMapWritableTest

edu.umd.cloud9.io.fastutil.Int2IntOpenHashMapWritableTest

edu.umd.cloud9.io.fastutil.String2FloatOpenHashMapWritableTest

edu.umd.cloud9.io.fastutil.String2IntOpenHashMapWritableTest

edu.umd.cloud9.io.map.HMapIDW

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.