Package ivory.core

Examples of ivory.core.RetrievalEnvironment.readCollectionDocumentCount()


    String collectionName = env.readCollectionName();

    int reduceTasks = conf.getInt(Constants.NumReduceTasks, 0);
    int minSplitSize = conf.getInt(Constants.MinSplitSize, 0);
    int collectionDocCnt = env.readCollectionDocumentCount();
    int maxHeap = conf.getInt(Constants.MaxHeap, 2048);

    String postingsType = conf.get(Constants.PostingsListsType,
        PostingsListDocSortedPositional.class.getCanonicalName());
    @SuppressWarnings("unchecked")
View Full Code Here


      mDLTable = new DocLengthTable4B(env.getDoclengthsData(), fs);
    } catch (IOException e1) {
      throw new RuntimeException("Error initializing Doclengths file");
    }
    LOG.info(mDLTable.getAvgDocLength()+" is average source-language document length.");
    LOG.info(targetEnv.readCollectionDocumentCount()+" is number of target-language docs. We use the target-side DF table so we set #docs to this value in our scoring model.");

    /////// Configuration setup

    conf.set(Constants.IndexPath, indexPath);
    conf.set("Ivory.ScoringModel", scoringModel);
View Full Code Here

    /////// Configuration setup

    conf.set(Constants.IndexPath, indexPath);
    conf.set("Ivory.ScoringModel", scoringModel);
    conf.setFloat("Ivory.AvgDocLen", mDLTable.getAvgDocLength());
    conf.setInt(Constants.CollectionDocumentCount, targetEnv.readCollectionDocumentCount());
    conf.set(Constants.Language, getConf().get("Ivory.Lang"));
    conf.set("Ivory.Normalize", getConf().get("Ivory.Normalize"));
    conf.set("Ivory.MinNumTerms", getConf().get("Ivory.MinNumTerms"));

    conf.setNumMapTasks(300);     
View Full Code Here

    FileSystem fs = FileSystem.get(job2);

    String indexPath = getConf().get("Ivory.IndexPath");
    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);
    int blockSize = getConf().getInt("Ivory.BlockSize", 0);
    int numDocs = env.readCollectionDocumentCount();
    int numBlocks = numDocs / blockSize + 1;

    String inputPath = null;
    for (int i = 0; i < numBlocks; i++) {
      inputPath = conf.get("Ivory.PCPOutputPath") + "/block" + i; // one block of output of PCP
View Full Code Here

        env = new RetrievalEnvironment(conf.get(Constants.IndexPath), fs);
      } catch (IOException e) {
        throw new RuntimeException("Unable to create RetrievalEnvironment!");
      }

      collectionDocumentCount = env.readCollectionDocumentCount();

      try {
        out = fs.create(new Path(env.getTermDocVectorsForwardIndex()), true);
        out.writeInt(env.readDocnoOffset());
        out.writeInt(collectionDocumentCount);
View Full Code Here

      } catch (IOException e) {
        throw new RuntimeException("Unable to create RetrievalEnvironment!");
      }

      String forwardIndexPath = env.getIntDocVectorsForwardIndex();
      collectionDocumentCount = env.readCollectionDocumentCount();

      try {
        out = fs.create(new Path(forwardIndexPath), true);
        out.writeInt(env.readDocnoOffset());
        out.writeInt(collectionDocumentCount);
View Full Code Here

      int finalNumDocs = weightedIntVectorsTool.run();

      LOG.info("Job BuildTargetLangWeightedIntDocVectors finished in " +
          (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
      if (finalNumDocs > 0) {
        LOG.info("Changed doc count: " + env.readCollectionDocumentCount() +" => " + finalNumDocs);
        env.writeCollectionDocumentCount(finalNumDocs);
      }else {
        LOG.info("No document output! Terminating...");
        return -1;
      }
View Full Code Here

      int finalNumDocs = weightedIntVectorsTool.run();

      LOG.info("Job BuildTargetLangWeightedIntDocVectors finished in " +
          (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
      if (finalNumDocs > 0) {
        LOG.info("Changed doc count: " + env.readCollectionDocumentCount() +" => " + finalNumDocs);
        env.writeCollectionDocumentCount(finalNumDocs);
      }else {
        LOG.info("No document output! Terminating...");
        return -1;
      }
View Full Code Here

      mDLTable = new DocLengthTable4B(env.getDoclengthsData(), fs);
    } catch (IOException e1) {
      throw new RuntimeException("Error initializing Doclengths file");
    }
    LOG.info(mDLTable.getAvgDocLength()+" is average source-language document length.");
    LOG.info(targetEnv.readCollectionDocumentCount()+" is number of target-language docs. We use the target-side DF table so we set #docs to this value in our scoring model.");

    /////// Configuration setup

    conf.set(Constants.IndexPath, indexPath);
    conf.set("Ivory.ScoringModel", scoringModel);
View Full Code Here

    /////// Configuration setup

    conf.set(Constants.IndexPath, indexPath);
    conf.set("Ivory.ScoringModel", scoringModel);
    conf.setFloat("Ivory.AvgDocLen", mDLTable.getAvgDocLength());
    conf.setInt(Constants.CollectionDocumentCount, targetEnv.readCollectionDocumentCount());
    conf.set(Constants.Language, getConf().get("Ivory.Lang"));
    conf.set("Ivory.Normalize", getConf().get("Ivory.Normalize"));
    conf.set("Ivory.MinNumTerms", getConf().get("Ivory.MinNumTerms"));

    conf.setNumMapTasks(300);     
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.