Examples of readCollectionDocumentCount()


Examples of ivory.core.RetrievalEnvironment.readCollectionDocumentCount()

      } catch (IOException e) {
        throw new RuntimeException("Unable to create RetrievalEnvironment!");
      }

      String forwardIndexPath = env.getIntDocVectorsForwardIndex();
      collectionDocumentCount = env.readCollectionDocumentCount();

      try {
        out = fs.create(new Path(forwardIndexPath), true);
        out.writeInt(env.readDocnoOffset());
        out.writeInt(collectionDocumentCount);
View Full Code Here

Examples of ivory.core.RetrievalEnvironment.readCollectionDocumentCount()

        env = new RetrievalEnvironment(conf.get(Constants.IndexPath), fs);
      } catch (IOException e) {
        throw new RuntimeException("Unable to create RetrievalEnvironment!");
      }

      collectionDocumentCount = env.readCollectionDocumentCount();

      try {
        out = fs.create(new Path(env.getTermDocVectorsForwardIndex()), true);
        out.writeInt(env.readDocnoOffset());
        out.writeInt(collectionDocumentCount);
View Full Code Here

Examples of ivory.core.RetrievalEnvironment.readCollectionDocumentCount()

    String collectionName = env.readCollectionName();

    int reduceTasks = conf.getInt(Constants.NumReduceTasks, 0);
    int minSplitSize = conf.getInt(Constants.MinSplitSize, 0);
    int collectionDocCount = env.readCollectionDocumentCount();

    String postingsType = conf.get(Constants.PostingsListsType,
        PostingsListDocSortedPositional.class.getCanonicalName());
    @SuppressWarnings("unchecked")
    Class<? extends PostingsList> postingsClass =
View Full Code Here

Examples of ivory.core.RetrievalEnvironment.readCollectionDocumentCount()

      LOG.info("reading stats for " + index);

      RetrievalEnvironment env = new RetrievalEnvironment(index, fs);

      long l = env.readCollectionLength();
      int n = env.readCollectionDocumentCount();

      LOG.info(" - CollectionLength: " + l);
      LOG.info(" - CollectionDocumentCount: " + n);

      collectionLength += l;
View Full Code Here

Examples of ivory.core.RetrievalEnvironment.readCollectionDocumentCount()

    }

    // distribute global stats
    RetrievalEnvironment genv = new RetrievalEnvironment(statsPath, fs);
    long collectionLength = genv.readCollectionLength();
    int docCount = genv.readCollectionDocumentCount();
    float avgdl = genv.readCollectionAverageDocumentLength();

    sLogger.info("writing global stats from all index segments: ");
    sLogger.info(" - CollectionLength: " + collectionLength);
    sLogger.info(" - CollectionDocumentCount: " + docCount);
View Full Code Here

Examples of ivory.core.RetrievalEnvironment.readCollectionDocumentCount()

    fScoreFn = (ScoringModel) new Bm25();
    fScoreFn.setAvgDocLength(lang2AvgSentLen.get(fLang));        

    // we use df table of English side, so we should read collection doc count from English dir
    RetrievalEnvironment eEnv = new RetrievalEnvironment(eDir, fs);
    fScoreFn.setDocCount(eEnv.readCollectionDocumentCount());  

    classifier = new MoreGenericModelReader(pathMapping.get(modelFileName), localFs).constructModel();
  }

  private void loadEModels(JobConf conf) throws Exception {
View Full Code Here

Examples of ivory.core.RetrievalEnvironment.readCollectionDocumentCount()

    eTok = TokenizerFactory.createTokenizer(fs, eLang, tokenizerFile, null);
    sLogger.info("Tokenizer and vocabs created successfully.");

    eScoreFn = (ScoringModel) new Bm25();
    eScoreFn.setAvgDocLength(lang2AvgSentLen.get(eLang));        //average sentence length = heuristic based on De-En data
    eScoreFn.setDocCount(env.readCollectionDocumentCount());

    dict = new DefaultFrequencySortedDictionary(new Path(env.getIndexTermsData()), new Path(env.getIndexTermIdsData()), new Path(env.getIndexTermIdMappingData()), fs);
    dfTable = new DfTableArray(new Path(env.getDfByTermData()), fs);

    //for backward compatibility
View Full Code Here

Examples of ivory.core.RetrievalEnvironment.readCollectionDocumentCount()

    fScoreFn = (ScoringModel) new Bm25();
    fScoreFn.setAvgDocLength(lang2AvgSentLen.get(fLang));        

    // we use df table of English side, so we should read collection doc count from English dir
    RetrievalEnvironment eEnv = new RetrievalEnvironment(eDir, localFs);
    fScoreFn.setDocCount(eEnv.readCollectionDocumentCount());  

    classifier = new MoreGenericModelReader(new Path(conf.get("modelFileName")), localFs).constructModel();
  }

  private void loadEModels(Configuration conf) throws Exception {
View Full Code Here

Examples of ivory.core.RetrievalEnvironment.readCollectionDocumentCount()

    eTok = TokenizerFactory.createTokenizer(localFs, eLang, tokenizerFile, eVocabTrg);
    sLogger.info("Tokenizer and vocabs created successfully.");

    eScoreFn = (ScoringModel) new Bm25();
    eScoreFn.setAvgDocLength(lang2AvgSentLen.get(eLang));        //average sentence length = heuristic based on De-En data
    eScoreFn.setDocCount(env.readCollectionDocumentCount());

    dict = new DefaultFrequencySortedDictionary(new Path(env.getIndexTermsData()), new Path(env.getIndexTermIdsData()), new Path(env.getIndexTermIdMappingData()), localFs);
    dfTable = new DfTableArray(new Path(env.getDfByTermData()), localFs);
  }
View Full Code Here

Examples of ivory.core.RetrievalEnvironment.readCollectionDocumentCount()

    String collectionName = env.readCollectionName();

    int reduceTasks = conf.getInt(Constants.NumReduceTasks, 0);
    int minSplitSize = conf.getInt(Constants.MinSplitSize, 0);
    int collectionDocCount = env.readCollectionDocumentCount();

    String postingsType = conf.get(Constants.PostingsListsType,
        PostingsListDocSortedPositional.class.getCanonicalName());
    @SuppressWarnings("unchecked")
    Class<? extends PostingsList> postingsClass =
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.