Examples of HMapII


Examples of edu.umd.cloud9.util.map.HMapII

      }

      // if cache is non-empty, a docnos file has been entered
      if (localFiles != null) {
        sLogger.setLevel(Level.INFO);
        samplesMap = new HMapII();
        try {
          LineReader reader = new LineReader(FileSystem.getLocal(conf).open(localFiles[0]));
          Text t = new Text();
          while (reader.readLine(t) != 0) {
            int docno = Integer.parseInt(t.toString());
View Full Code Here

Examples of edu.umd.cloud9.util.map.HMapII

      }

      // if cache is non-empty, a docnos file has been entered
      if (localFiles != null) {
        sLogger.setLevel(Level.INFO);
        samplesMap = new HMapII();
        try {
          LineReader reader = new LineReader(FileSystem.getLocal(conf).open(localFiles[0]));
          Text t = new Text();
          while (reader.readLine(t) != 0) {
            int docno = Integer.parseInt(t.toString());
View Full Code Here

Examples of edu.umd.cloud9.util.map.HMapII

    //Parse queries and find integer codes for the query terms.
    HMapIV<String> parsedQueries = QueryUtility.loadQueries(queryPath);
    HMapIV<int[]> queries = QueryUtility.queryToIntegerCode(env, parsedQueries);

    Set<Integer> termidHistory = Sets.newHashSet();
    HMapII docLengths = new HMapII();

    SpamPercentileScore spamScores = new SpamPercentileScore();
    spamScores.initialize(spamPath, fs);
    int[] newDocids = DocumentUtility.spamSortDocids(spamScores);

    Posting posting = new Posting();
    List<TermPositions> positions = Lists.newArrayList();
    Map<Integer, TermPositions> positionsMap = Maps.newHashMap();

    for(int qid: queries.keySet()) {
      for(int termid: queries.get(qid)) {
        if(!termidHistory.contains(termid)) {
          termidHistory.add(termid);
          PostingsList pl = env.getPostingsList(env.getTermFromId(termid));
          PostingsReader reader = pl.getPostingsReader();

          positions.clear();
          positionsMap.clear();
          int[] data = new int[pl.getDf()];
          int index = 0;
          while (reader.nextPosting(posting)) {
            data[index] = newDocids[posting.getDocno()];
            positionsMap.put(data[index], new TermPositions(reader.getPositions(), reader.getTf()));
            docLengths.put(data[index], env.getDocumentLength(posting.getDocno()));
            index++;
          }
          Arrays.sort(data);

          for(int i = 0; i < data.length; i++) {
            positions.add(positionsMap.get(data[i]));
          }

          output.writeInt(termid);
          output.writeInt(pl.getDf());
          CompressedPositionalPostings.newInstance(data, positions).write(output);
        }
      }
      LOGGER.info("Compressed query " + qid);
    }

    output.writeInt(-1);

    output.writeInt(docLengths.size());
    for(int docid: docLengths.keySet()) {
      output.writeInt(docid);
      output.writeInt(docLengths.get(docid));
    }

    output.close();
  }
View Full Code Here

Examples of edu.umd.cloud9.util.map.HMapII

          + "1-Path to the Counts Table: A text file consisting of one "
              + "<length-of-anchor-text>\t<number-of-instances-to-sample> record per line.)\n"
                  + "2-Integer: Minimum number of target documents\n"
                      + "3-Integer: Maximum number of target documents");
    }
    counts = new HMapII();
    minNumberTargets = Integer.parseInt(params[1]);
    maxNumberTargets = Integer.parseInt(params[2]);

    try {
      FSDataInputStream in = fs.open(new Path(params[0]));
View Full Code Here

Examples of edu.umd.cloud9.util.map.HMapII

  public void initialize(FileSystem fs, String... params) {
    if (params.length != 1) {
      throw new RuntimeException(toString() + ": Missing counts table (path to a text file consisting of one "
          + "<number-of-target-documents>\t<number-of-instances-to-sample> record per line.)");
    }
    counts = new HMapII();

    try {
      FSDataInputStream in = fs.open(new Path(params[0]));
      String next;
      String[] records;
View Full Code Here

Examples of edu.umd.cloud9.util.map.HMapII

                            (float) env.getDefaultDf(), (float) env.getDefaultCf());
  }

  private void preparePostings(String postingsPath) throws Exception {
    postings = new HMapIV<CompressedPositionalPostings>();
    dfs = new HMapII();
    docLengths = new HMapII();

    FSDataInputStream input = fs.open(new Path(postingsPath));
    int termid = input.readInt();
    while(termid != -1) {
      dfs.put(termid, input.readInt());
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.