Examples of org.apache.hadoop.util.LineReader

org.apache.hadoop.util.LineReader
A class that provides a line reader from an input stream. Depending on the constructor used, lines will either be terminated by:
- one of the following: '\n' (LF) , '\r' (CR), or '\r\n' (CR+LF).
- or, a custom byte sequence delimiter
In both cases, EOF also terminates an otherwise unterminated line.

  public String read() throws IOException  {
    if (readerHelper == null) {
      readerHelper = new ReaderHelper<LineReader, byte[]>(getInput(), getInputContext(), getSplit(), getCodec()) {
        @Override
        protected LineReader createReader(InputStream inputStream) throws IOException {
          LineReader lineReader = new LineReader(inputStream, delimiter);
          if (getContext().getStart() > 0) {
            processReadCount(lineReader.readLine(new Text()));
          }
          return lineReader;
        }


        @Override

View Full Code Here

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    boolean skipFirstLine = false;
    if (codec != null) {
      in = new LineReader(codec.createInputStream(fileIn), job);
      end = Long.MAX_VALUE;
    } else {
      if (start != 0) {
        skipFirstLine = true;
        --start;
        fileIn.seek(start);
      }
      in = new LineReader(fileIn, job);
    }
    if (skipFirstLine) { // skip first line and re-establish "start".
      start += in.readLine(new Text(), 0,
          (int) Math.min((long) Integer.MAX_VALUE, end - start));
    }

View Full Code Here

    inputCodec = codecs.getCodec(p);
    FileSystem fs = p.getFileSystem(getConf());
    FSDataInputStream fileIn = fs.open(p);


    if (inputCodec == null) {
      return new LineReader(fileIn, getConf());
    } else {
      inputDecompressor = CodecPool.getDecompressor(inputCodec);
      return new LineReader(inputCodec.createInputStream(fileIn,
          inputDecompressor), getConf());
    }
  }

View Full Code Here

              pos, flen, SplittableCompressionCodec.READ_MODE.BYBLOCK);
        if (in.getAdjustedStart() >= flen) {
          break;
        }
        LOG.info("SAMPLE " + in.getAdjustedStart() + "," + in.getAdjustedEnd());
        final LineReader lreader = new LineReader(in);
        lreader.readLine(line); // ignore; likely partial
        if (in.getPos() >= flen) {
          break;
        }
        lreader.readLine(line);
        final int seq1 = readLeadingInt(line);
        lreader.readLine(line);
        if (in.getPos() >= flen) {
          break;
        }
        final int seq2 = readLeadingInt(line);
        assertEquals("Mismatched lines", seq1 + 1, seq2);

View Full Code Here

  // get the version of the filesystem from the masterindex file
  // the version is currently not useful since its the first version 
  // of archives
  public int getHarVersion() throws IOException { 
    FSDataInputStream masterIn = fs.open(masterIndex);
    LineReader lmaster = new LineReader(masterIn, getConf());
    Text line = new Text();
    lmaster.readLine(line);
    try {
      masterIn.close();
    } catch(IOException e){
      //disregard it.
      // its a read.

View Full Code Here

      List<String> children, FileStatus archiveIndexStat) throws IOException {
    // read the index file
    FSDataInputStream aIn = null;
    try {
      aIn = fs.open(archiveIndex);
      LineReader aLin;
      long read = 0;
      aLin = new LineReader(aIn, getConf());
      String parentString = parent.getName();
      Path harPath = new Path(parentString);
      int harlen = harPath.depth();
      Text line = new Text();
      while (read < archiveIndexStat.getLen()) {
        int tmp = aLin.readLine(line);
        read += tmp;
        String lineFeed = line.toString();
        String child = lineFeed.substring(0, lineFeed.indexOf(" "));
        if ((child.startsWith(parentString))) {
          Path thisPath = new Path(child);

View Full Code Here

    int hashCode = getHarHash(harPath);
    // get the master index to find the pos 
    // in the index file
    FSDataInputStream in = fs.open(masterIndex);
    FileStatus masterStat = fs.getFileStatus(masterIndex);
    LineReader lin = new LineReader(in, getConf());
    Text line = new Text();
    long read = lin.readLine(line);
   //ignore the first line. this is the header of the index files
    String[] readStr = null;
    List<Store> stores = new ArrayList<Store>();
    while(read < masterStat.getLen()) {
      int b = lin.readLine(line);
      read += b;
      readStr = line.toString().split(" ");
      int startHash = Integer.parseInt(readStr[0]);
      int endHash  = Integer.parseInt(readStr[1]);
      if (startHash <= hashCode && hashCode <= endHash) {
        stores.add(new Store(Long.parseLong(readStr[2]), 
            Long.parseLong(readStr[3]), startHash,
            endHash));
      }
      line.clear();
    }
    try {
      lin.close();
    } catch(IOException io){
      // do nothing just a read.
    }
    FSDataInputStream aIn = fs.open(archiveIndex);
    LineReader aLin;
    String retStr = null;
    // now start reading the real index file
    for (Store s: stores) {
      read = 0;
      aIn.seek(s.begin);
      aLin = new LineReader(aIn, getConf());
      while (read + s.begin < s.end) {
        int tmp = aLin.readLine(line);
        read += tmp;
        String lineFeed = line.toString();
        String[] parsed = lineFeed.split(" ");
        if (harPath.compareTo(new Path(parsed[0])) == 0) {
          // bingo!

View Full Code Here


    private void parseMetaData() throws IOException {
      Text line = new Text();
      long read;
      FSDataInputStream in = null;
      LineReader lin = null;


      try {
        in = fs.open(masterIndexPath);
        FileStatus masterStat = fs.getFileStatus(masterIndexPath);
        masterIndexTimestamp = masterStat.getModificationTime();
        lin = new LineReader(in, getConf());
        read = lin.readLine(line);


        // the first line contains the version of the index file
        String versionLine = line.toString();
        String[] arr = versionLine.split(" ");
        version = Integer.parseInt(arr[0]);
        // make it always backwards-compatible
        if (this.version > HarFileSystem.VERSION) {
          throw new IOException("Invalid version " + 
              this.version + " expected " + HarFileSystem.VERSION);
        }


        // each line contains a hashcode range and the index file name
        String[] readStr;
        while(read < masterStat.getLen()) {
          int b = lin.readLine(line);
          read += b;
          readStr = line.toString().split(" ");
          int startHash = Integer.parseInt(readStr[0]);
          int endHash  = Integer.parseInt(readStr[1]);
          stores.add(new Store(Long.parseLong(readStr[2]), 
              Long.parseLong(readStr[3]), startHash,
              endHash));
          line.clear();
        }
      } catch (IOException ioe) {
        LOG.warn("Encountered exception ", ioe);
        throw ioe;
      } finally {
        IOUtils.cleanup(LOG, lin, in);
      }


      FSDataInputStream aIn = fs.open(archiveIndexPath);
      try {
        FileStatus archiveStat = fs.getFileStatus(archiveIndexPath);
        archiveIndexTimestamp = archiveStat.getModificationTime();
        LineReader aLin;


        // now start reading the real index file
        for (Store s: stores) {
          read = 0;
          aIn.seek(s.begin);
          aLin = new LineReader(aIn, getConf());
          while (read + s.begin < s.end) {
            int tmp = aLin.readLine(line);
            read += tmp;
            String lineFeed = line.toString();
            String[] parsed = lineFeed.split(" ");
            parsed[0] = decodeFileName(parsed[0]);
            archive.put(new Path(parsed[0]), new HarStatus(lineFeed));

View Full Code Here

     * @throws IOException
     */
    public void bindTo(String fileName, BufferedPositionedInputStream is,
                       long offset, long end) throws IOException {
        this.istream  = is;
        this.in = new LineReader(istream);
    }

View Full Code Here

          continue;
        }
        Path path = file.getPath();
        FileSystem fs = path.getFileSystem(job.getConfiguration());
        FSDataInputStream fileIn = fs.open(path);
        LineReader in = new LineReader(fileIn, job.getConfiguration());
        int lineLen = 0;
        while(true) {
          Text lineText = new Text();
          lineLen = in.readLine(lineText);
          if(lineLen <= 0) {
          break;
          }
          Matcher m = LINE_PATTERN.matcher(lineText.toString());
          if((m != null) && m.matches()) {
            TableName tableName = TableName.valueOf(m.group(1));
            int startRow = Integer.parseInt(m.group(2));
            int rows = Integer.parseInt(m.group(3));
            int totalRows = Integer.parseInt(m.group(4));
            int clients = Integer.parseInt(m.group(5));
            boolean flushCommits = Boolean.parseBoolean(m.group(6));
            boolean writeToWAL = Boolean.parseBoolean(m.group(7));


            LOG.debug("tableName=" + tableName +
                      " split["+ splitList.size() + "] " +
                      " startRow=" + startRow +
                      " rows=" + rows +
                      " totalRows=" + totalRows +
                      " clients=" + clients +
                      " flushCommits=" + flushCommits +
                      " writeToWAL=" + writeToWAL);


            PeInputSplit newSplit =
              new PeInputSplit(tableName, startRow, rows, totalRows, clients,
                flushCommits, writeToWAL);
            splitList.add(newSplit);
          }
        }
        in.close();
      }


      LOG.info("Total # of splits: " + splitList.size());
      return splitList;
    }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.apache.hadoop.util.LineReader

co.nubetech.hiho.dedup.DelimitedLineRecordReader

com.cloudera.iterativereduce.io.HDFSLineParser

com.datasalt.pangool.tuplemr.mapred.lib.input.TupleTextInputFormat$TupleTextInputReader

com.hadoop.mapred.DeprecatedLzoLineRecordReader

com.hadoop.mapreduce.LzoLineRecordReader

com.twitter.elephantbird.mapreduce.input.LzoBinaryB64LineRecordReader

com.twitter.elephantbird.mapreduce.input.LzoJsonRecordReader

com.twitter.elephantbird.mapreduce.input.LzoLineRecordReader

com.twitter.elephantbird.mapreduce.input.LzoW3CLogRecordReader

edu.umd.cloud9.collection.aquaint2.Aquaint2DocnoMapping

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.