Examples of FSLineReader


Examples of edu.umd.cloud9.io.FSLineReader

        if (fileStats[i].getPath().getName().startsWith("_")) {
          continue;
        }

        LOG.info("processing " + fileStats[i].getPath());
        FSLineReader reader = new FSLineReader(fileStats[i].getPath(), fs);

        Text line = new Text();
        while (reader.readLine(line) > 0) {
          String[] arr = line.toString().split("\\t+", 2);

          int docno = Integer.parseInt(arr[0]);
          int len = Integer.parseInt(arr[1]);

          // Note that because of speculative execution there may be
          // multiple copies of doclength data. Therefore, we can't
          // just count number of doclengths read. Instead, keep track
          // of largest docno encountered.
          if (docno < docnoOffset) {
            throw new RuntimeException(
                "Error: docno " + docno + " < docnoOffset " + docnoOffset + "!");
          }

          doclengths[docno - docnoOffset] = len;

          if (docno > maxDocno) {
            maxDocno = docno;
          }
          if (docno < minDocno) {
            minDocno = docno;
          }
        }
        reader.close();
        context.getCounter(DocLengths.Files).increment(1);
      }

      LOG.info("min docno: " + minDocno);
      LOG.info("max docno: " + maxDocno);
View Full Code Here

Examples of edu.umd.cloud9.io.FSLineReader

      }

      if(localFiles!=null && localFiles.length > 0){
        samplesMap = new HMapIIW();
        try {
          FSLineReader reader = new FSLineReader(localFiles[0], FileSystem.getLocal(job));
          Text t = new Text();
          while(reader.readLine(t)!=0){
            int docno = Integer.parseInt(t.toString());
            samplesMap.put(docno, 1);
          }
          reader.close();
        } catch (IOException e1) {
        }
        sLogger.info(samplesMap);
      }
    }
View Full Code Here

Examples of edu.umd.cloud9.io.FSLineReader

      // if cache is non-empty, a docnos file has been entered
      if(localFiles != null){
        sLogger.setLevel(Level.INFO);
        samplesMap = new HMapII();
        try {
          FSLineReader reader = new FSLineReader(localFiles[0], FileSystem.getLocal(conf));
          Text t = new Text();
          while(reader.readLine(t)!=0){
            int docno = Integer.parseInt(t.toString());
            samplesMap.put(docno, 1);
          }
          reader.close();
        } catch (IOException e1) {
        }
        sLogger.info(samplesMap);
      }
    }
View Full Code Here

Examples of edu.umd.cloud9.io.FSLineReader

      FileSystem fs = FileSystem.get(conf);

      sLogger.info("reading " + inputFile);

      FSLineReader reader = new FSLineReader(new Path(inputFile), fs);
      FSDataOutputStream writer = fs.create(new Path(outputFile), true);

      Text line = new Text();
      while (reader.readLine(line) > 0) {
        String[] arr = line.toString().split("\\s+");

        String docid = arr[2];
        int rank = Integer.parseInt(arr[3]);

        long start = System.currentTimeMillis();
        ClueWarcRecord doc = null;

        for (int i = 0; i < 10; i++) {
          doc = indexes[i].getDocument(docid);
          if (doc != null)
            break;
        }
        String url = doc.getHeaderMetadataItem("WARC-Target-URI");
        long duration = System.currentTimeMillis() - start;

        reporter.incrCounter(MyCounter.Count, 1);
        reporter.incrCounter(MyCounter.Time, duration);

        if (rank == 1 || rank % 100 == 0)
          sLogger.info(line + " " + url + " (" + duration + "ms)");
        writer.write(new String(line + " " + url + "\n").getBytes());
      }

      reader.close();
      writer.close();

    }
View Full Code Here

Examples of edu.umd.cloud9.io.FSLineReader

      FileSystem fs = FileSystem.get(conf);

      sLogger.info("reading " + inputFile);

      FSLineReader reader = new FSLineReader(new Path(inputFile), fs);
      FSDataOutputStream writer = fs.create(new Path(outputFile), true);

      Text line = new Text();
      while (reader.readLine(line) > 0) {
        String[] arr = line.toString().split("\\s+");

        String docid = arr[2];
        int rank = Integer.parseInt(arr[3]);

        long start = System.currentTimeMillis();
        String url = findex.getDocument(docid).getHeaderMetadataItem("WARC-Target-URI");
        long duration = System.currentTimeMillis() - start;

        reporter.incrCounter(MyCounter.Count, 1);
        reporter.incrCounter(MyCounter.Time, duration);

        if (rank == 1 || rank % 100 == 0)
          sLogger.info(line + " " + url + " (" + duration + "ms)");
        writer.write(new String(line + " " + url + "\n").getBytes());
      }

      reader.close();
      writer.close();

    }
View Full Code Here

Examples of edu.umd.cloud9.io.FSLineReader

     
      if(localFiles.length > 1){
        dotProductThresholds = new float[D];
        int i = 0;
        try {
          FSLineReader reader = new FSLineReader(localFiles[1], FileSystem.getLocal(job));
          Text t = new Text();
          while(reader.readLine(t)!=0){
            float val = Float.parseFloat(t.toString());
            sLogger.debug(i + " --> "+val);
            dotProductThresholds[i] = val;
          }
          reader.close();
        } catch (IOException e1) {
        }
        sLogger.info("Dot product thresholds read");
      }else{
        sLogger.info("Dot product thresholds file not specified in option Ivory.DotProdThreshFile");
View Full Code Here

Examples of edu.umd.cloud9.io.FSLineReader

      }

      if(localFiles != null && localFiles.length > 0){
        samplesMap = new HMapIIW();
        try {
          FSLineReader reader = new FSLineReader(conf.get("Ivory.SampleFile"), FileSystem.get(conf));
          Text t = new Text();
          while(reader.readLine(t)!=0){
            int docno = Integer.parseInt(t.toString());
            sLogger.info(docno + " --> sample");
            samplesMap.put(docno, 1);
          }
          reader.close();
        } catch (IOException e1) {
        }
        sLogger.info(samplesMap.size()+" sampled");
      }else{
        sLogger.info("samples file does not exist");
View Full Code Here

Examples of edu.umd.cloud9.io.FSLineReader

      }

      if(localFiles != null && localFiles.length > 0){
        samplesMap = new HMapIIW();
        try {
          FSLineReader reader = new FSLineReader(localFiles[0], FileSystem.getLocal(job));
          Text t = new Text();
          while(reader.readLine(t)!=0){
            int docno = Integer.parseInt(t.toString());
            sLogger.info(docno + " --> sample");
            samplesMap.put(docno, 1);
          }
          reader.close();
        } catch (IOException e1) {
        }
        sLogger.info(samplesMap.size()+" sampled");
      }else{
        sLogger.info("samples file not specified in local cache");
View Full Code Here

Examples of edu.umd.cloud9.io.FSLineReader

      }

      if(localFiles != null && localFiles.length > 0){
        samplesMap = new HMapIIW();
        try {
          FSLineReader reader = new FSLineReader(localFiles[0], FileSystem.getLocal(job));
          Text t = new Text();
          while(reader.readLine(t)!=0){
            int docno = Integer.parseInt(t.toString());
            sLogger.info(docno + " --> sample");
            samplesMap.put(docno, 1);
          }
          reader.close();
        } catch (IOException e1) {
        }
        sLogger.info(samplesMap.size()+" sampled");
      }else{
        sLogger.info("samples file not specified in option SampleDocnosFile");
View Full Code Here

Examples of edu.umd.cloud9.io.FSLineReader

     
      sampleDocnosFile = job.get("SampleDocnosFile");
      if(sampleDocnosFile!=null){
        samplesMap = new HMapIIW();
        try {
          FSLineReader reader = new FSLineReader(sampleDocnosFile);
          Text t = new Text();
          while(reader.readLine(t)!=0){
            int docno = Integer.parseInt(t.toString());
            samplesMap.put(docno, 1);
          }
          reader.close();
        } catch (IOException e1) {
        }
      }
     
      mDocMapping = new WikipediaDocnoMapping();
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.