Package it.unimi.di.big.mg4j.document

Examples of it.unimi.di.big.mg4j.document.Document


          throws IOException {
        Long2LongRBTreeMap relFreq = new Long2LongRBTreeMap();
        relFreq.defaultReturnValue(0);

        for (int i = 0; i < min(k, results.size()); i++) {
          Document document = collection
              .document(results.get(i).document);
          Reader reader = (Reader) document.content(fieldIndex);
          WordReader wordReader = document.wordReader(fieldIndex);
          wordReader.setReader(reader);

          MutableString word = new MutableString();
          MutableString nonWord = new MutableString();
          final LongRBTreeSet set = new LongRBTreeSet();

          while (wordReader.next(word, nonWord)) {
            if (processor.processTerm(word)) {
              long termId = index.getTermId(word);
              if (termId >= 0)
                if (set.add(termId))
                  relFreq
                      .put(termId,
                          relFreq.get(termId) + 1);
            }
          }

          document.close();

        }
        return relFreq;
      }
View Full Code Here


     */
    private Multiset<Long> readDocument(int[] contents, long document) throws IOException {
        MutableString separator = new MutableString();
        MutableString token = new MutableString();

        final Document doc = collection.document(document);
        long unknown = index.getUnknownTermId();

        Multiset<Long> words = HashMultiset.create();

        for (int contentId : contents) {
            final WordReader reader = doc.wordReader(0);

            // Loop over terms
            while (reader.next(token, separator)) {
                final Long termId = index.getTermId(token);
                if (termId == unknown) continue;
View Full Code Here

      totalRetrieved += retrieved;
      logger.info(String.format("Returned %d results", retrieved));
      int added = 0;
      for (int i = 0; i < retrieved && added < capacity; i++) {
        DocumentScoreInfo dsi = results.get(i);
        Document document = collection.document(dsi.document);
                System.err.println("URI: " + document.uri());
                System.err.println("URI["+dsi.document+"]: " + collection.metadata(dsi.document).get(PropertyBasedDocumentFactory.MetadataKeys.URI));
        final String docno = (String) collection.metadata(dsi.document).get(PropertyBasedDocumentFactory.MetadataKeys.URI);

        // If it was not a discarded document
        if (discardedDocuments == null
            || !discardedDocuments.contains(docno)) {
          output.format("%s Q0 %s %d %g %s%n", topicId, docno, i,
              dsi.score, runId);
          added++;
        }
        document.close();
      }
      task.progress();
    }
    return 0;
  }
View Full Code Here

TOP

Related Classes of it.unimi.di.big.mg4j.document.Document

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.