Examples of readCollectionTermCount()


Examples of ivory.core.RetrievalEnvironment.readCollectionTermCount()

    JobConf job = new JobConf(getConf(), WriteRandomVectors.class);
//    job.set("mapred.job.tracker", "local");
//    job.set("fs.default.name", "file:///");
    FileSystem fs = FileSystem.get(job);
    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);
    K = (int) env.readCollectionTermCount();
    job.setJobName("WriteRandomVectors");
   
    if(D<=0 || K<=0){
      throw new RuntimeException("parameters not read properly");
    }
View Full Code Here

Examples of ivory.core.RetrievalEnvironment.readCollectionTermCount()

        fs.exists(new Path(env.getCfByIntData()))) {
      LOG.info("term and term id data exist: skipping!");
      return 0;
    }

    conf.setInt(Constants.CollectionTermCount, (int) env.readCollectionTermCount());
    conf.set("mapred.child.java.opts", "-Xmx2048m");

    Path tmpPath = new Path(env.getTempDirectory());
    fs.delete(tmpPath, true);
View Full Code Here

Examples of ivory.core.RetrievalEnvironment.readCollectionTermCount()

    JobConf job = new JobConf(conf, ComputeSignaturesMinhash.class);
//    job.set("mapred.job.tracker", "local");
//    job.set("fs.default.name", "file:///");
    FileSystem fs = FileSystem.get(job);
    RetrievalEnvironment env = new RetrievalEnvironment(dir, fs);
    int vocabSize = (int) env.readCollectionTermCount();

    job.setJobName("ComputeSignatures_minhash");//+"_D="+D+"_"+RetrievalEnvironment.readCollectionName(fs, dir));

    String inputPath = PwsimEnvironment.getFileNameWithPars(dir, "IntDocs");
    String outputPath = PwsimEnvironment.getFileNameWithPars(dir, "SignaturesMinhash");
View Full Code Here

Examples of ivory.core.RetrievalEnvironment.readCollectionTermCount()

    // Get CF and DF counts
    startTime = System.currentTimeMillis();
    LOG.info("Counting terms...");
    ComputeGlobalTermStatistics termCountWithDfAndCfTool = new ComputeGlobalTermStatistics(conf);
    termCountWithDfAndCfTool.run();
    LOG.info("TermCount = "+env.readCollectionTermCount()+"\nJob finished in "+(System.currentTimeMillis()-startTime)/1000.0+" seconds");

    // Build a map from terms to sequentially generated integer term ids
    startTime = System.currentTimeMillis();
    conf.setInt("Ivory.TermIndexWindow", TermIndexWindow);
    LOG.info("Building term-to-integer id mapping...");
View Full Code Here

Examples of ivory.core.RetrievalEnvironment.readCollectionTermCount()

      try {
        engVocabH = HadoopAlign.loadVocab(new Path(conf.get("Ivory.FinalVocab")), conf);
      } catch (IOException e) {
        e.printStackTrace();
     
      LOG.info("Changed term count to : "+env.readCollectionTermCount() + " = " + engVocabH.size());
      env.writeCollectionTermCount(engVocabH.size());
    }
   
    LOG.info("Preprocessing job finished in "+(System.currentTimeMillis()-preprocessStartTime)/1000.0+" seconds");
View Full Code Here

Examples of ivory.core.RetrievalEnvironment.readCollectionTermCount()

    String indexPath = getConf().get("Ivory.IndexPath");

    JobConf job = new JobConf(getConf(), WriteRandomVectors.class);
    FileSystem fs = FileSystem.get(job);
    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);
    K = (int) env.readCollectionTermCount();
    job.setJobName("WriteRandomVectors");

    if (D <= 0 || K <= 0) {
      throw new RuntimeException("parameters not read properly");
    }
View Full Code Here

Examples of ivory.core.RetrievalEnvironment.readCollectionTermCount()

      throw new RuntimeException("Parameters not read properly! Quitting...");
    }
    JobConf job = new JobConf(conf, ComputeSignaturesMinhash.class);
    FileSystem fs = FileSystem.get(job);
    RetrievalEnvironment env = new RetrievalEnvironment(dir, fs);
    int vocabSize = (int) env.readCollectionTermCount();

    job.setJobName("ComputeSignatures_minhash");

    String inputPath = PwsimEnvironment.getIntDocvectorsFile(dir, fs);
    String outputPath = PwsimEnvironment.getSignaturesDir(dir, numInts, "minhash");  
View Full Code Here

Examples of ivory.core.RetrievalEnvironment.readCollectionTermCount()

        fs.exists(new Path(env.getCfByIntData()))) {
      LOG.info("term and term id data exist: skipping!");
      return 0;
    }

    conf.setInt(Constants.CollectionTermCount, (int) env.readCollectionTermCount());
    conf.set("mapreduce.map.memory.mb", "2048");
    conf.set("mapreduce.map.java.opts", "-Xmx2048m");
    conf.set("mapreduce.reduce.memory.mb", "2048");
    conf.set("mapreduce.reduce.java.opts", "-Xmx2048m");
View Full Code Here

Examples of ivory.core.RetrievalEnvironment.readCollectionTermCount()

    JobConf job = new JobConf(conf, ComputeSignaturesMinhash.class);
    // job.set("mapred.job.tracker", "local");
    // job.set("fs.default.name", "file:///");
    FileSystem fs = FileSystem.get(job);
    RetrievalEnvironment env = new RetrievalEnvironment(dir, fs);
    int vocabSize = (int) env.readCollectionTermCount();

    job.setJobName("ComputeSignatures_minhash");// +"_D="+D+"_"+RetrievalEnvironment.readCollectionName(fs,
                                                // dir));

    String inputPath = PwsimEnvironment.getFileNameWithPars(dir, "IntDocs");
View Full Code Here

Examples of ivory.core.RetrievalEnvironment.readCollectionTermCount()

    // Get CF and DF counts.
    startTime = System.currentTimeMillis();
    LOG.info("Counting terms...");
    exitCode = new ComputeGlobalTermStatistics(conf).run();
    LOG.info("TermCount = " + env.readCollectionTermCount());
    if (exitCode >= 0) {
      LOG.info("Job ComputeGlobalTermStatistics finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
    }else {
      LOG.info("Error: ComputeGlobalTermStatistics. Terminating...");
      return -1;
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.