Examples of FileSplit


Examples of org.apache.hadoop.mapred.FileSplit

      return null;
    }
    Path p = new Path(path);
    long start = Long.parseLong(job.get("map.input.start"));
    long length = Long.parseLong(job.get("map.input.length"));
    return new FileSplit(p, start, length, job);
  }
View Full Code Here

Examples of org.apache.hadoop.mapred.FileSplit

      int tableIndex = 0, fileNumber = 0;
      Integer[] fileNumbers = helper.getFileNumbers();
      if (fileNumbers.length != realReaders.size())
        throw new IOException("Number of tables in input paths of input splits is incorrect.");
      for (int j=0; j<inputSplits.length; j++) {
        FileSplit fileSplit = (FileSplit) inputSplits[j];
        start = fileSplit.getStart();
        if (start <= prevStart)
        {
          fileNumber++;
          if (fileNumber >= fileNumbers[tableIndex])
          {
            inputSplitBoundaries[tableIndex++] = j;
            fileNumber = 0;
          }
        }
        prevStart = start;
      }
      inputSplitBoundaries[tableIndex++] =  inputSplits.length;
      if (tableIndex != realReaders.size())
        throw new IOException("Number of tables in input splits is incorrect.");
      for (tableIndex = 0; tableIndex < realReaders.size(); tableIndex++)
      {
        int startSplitIndex = (tableIndex == 0 ? 0 : inputSplitBoundaries[tableIndex - 1]);
        int splitLen = (tableIndex == 0 ? inputSplitBoundaries[0] :
            inputSplitBoundaries[tableIndex] - inputSplitBoundaries[tableIndex-1]);
        BasicTable.Reader reader = realReaders.get(tableIndex);
        /* Get the index of the column group that will be used for row-split.*/
        int splitCGIndex = reader.getRowSplitCGIndex();
       
        long starts[] = new long[splitLen];
        long lengths[] = new long[splitLen];
        int batches[] = new int[splitLen + 1];
        batches[0] = 0;
        int numBatches = 0;
        int batchSize = 0;
        Path paths[] = new Path [splitLen];
        long totalLen = 0;
        final double SPLIT_SLOP = 1.1;
        int endSplitIndex = startSplitIndex + splitLen;
        for (int j=startSplitIndex; j< endSplitIndex; j++) {
          FileSplit fileSplit = (FileSplit) inputSplits[j];
          Path p = fileSplit.getPath();
          long blockSize = p.getFileSystem(conf).getBlockSize(p);
          long splitSize = (long) (helper.computeSplitSize(goalSize, minSize, blockSize) * SPLIT_SLOP);
          start = fileSplit.getStart();
          long length = fileSplit.getLength();
          int index = j - startSplitIndex;
          starts[index] = start;
          lengths[index] = length;
          totalLen += length;
          paths[index] = p;
View Full Code Here

Examples of org.apache.hadoop.mapred.FileSplit

           sl.next(key, value); last = sl.getPosition()) {
        // if adding this split would put this split past the target size,
        // cut the last split and put this next file in the next split.
        if (acc + key.get() > targetsize && acc != 0) {
          long splitsize = last - pos;
          splits.add(new FileSplit(src, pos, splitsize, job));
          cbrem -= splitsize;
          pos = last;
          acc = 0L;
        }
        acc += key.get();
      }
      if (cbrem != 0) {
        splits.add(new FileSplit(src, pos, cbrem, job));
      }

      return splits.toArray(new FileSplit[splits.size()]);
    }
View Full Code Here

Examples of org.apache.hadoop.mapred.FileSplit

  public static class SymlinkTextInputSplit extends FileSplit {
    private final FileSplit split;

    public SymlinkTextInputSplit() {
      super((Path)null, 0, 0, (String[])null);
      split = new FileSplit((Path)null, 0, 0, (String[])null);
    }
View Full Code Here

Examples of org.apache.hadoop.mapred.FileSplit

          // we may miss a sync here
          HiveInputSplit newSplit = split;
          if (split.inputFormatClassName().contains("RCFile")
              || split.inputFormatClassName().contains("SequenceFile")) {
            if (split.getStart() > SequenceFile.SYNC_INTERVAL) {
              newSplit = new HiveInputSplit(new FileSplit(split.getPath(), split
                  .getStart()
                  - SequenceFile.SYNC_INTERVAL, split.getLength()
                  + SequenceFile.SYNC_INTERVAL, split.getLocations()), split
                  .inputFormatClassName());
            }
View Full Code Here

Examples of org.apache.hadoop.mapred.FileSplit

        Reporter reporter) throws IOException {
      InputFormat indirIF = (InputFormat)ReflectionUtils.newInstance(
          job.getClass("mapred.indirect.input.format",
            SequenceFileInputFormat.class), job);
      IndirectSplit is = ((IndirectSplit)split);
      return indirIF.getRecordReader(new FileSplit(is.getPath(), 0,
            is.getLength(), job),
          job, reporter);
    }
View Full Code Here

Examples of org.apache.hadoop.mapreduce.lib.input.FileSplit

        private String flag;// A同现矩阵 or B评分矩阵

        @Override
        protected void setup(Context context) throws IOException, InterruptedException {
            FileSplit split = (FileSplit) context.getInputSplit();
            flag = split.getPath().getParent().getName();// 判断读的数据集

            // System.out.println(flag);
        }
View Full Code Here

Examples of org.apache.hadoop.mapreduce.lib.input.FileSplit

        private int rowIndexA = 1; // 矩阵A,当前在第几行
        private int rowIndexB = 1; // 矩阵B,当前在第几行

        @Override
        protected void setup(Context context) throws IOException, InterruptedException {
            FileSplit split = (FileSplit) context.getInputSplit();
            flag = split.getPath().getName();// 判断读的数据集
        }
View Full Code Here

Examples of org.apache.hadoop.mapreduce.lib.input.FileSplit

        private int rowNum = 4;// 矩阵A的行数
        private int colNum = 2;// 矩阵B的列数

        @Override
        protected void setup(Context context) throws IOException, InterruptedException {
            FileSplit split = (FileSplit) context.getInputSplit();
            flag = split.getPath().getName();// 判断读的数据集
        }
View Full Code Here

Examples of org.apache.hadoop.mapreduce.lib.input.FileSplit

        private String flag;// tmp1 or result

        @Override
        protected void setup(Context context) throws IOException, InterruptedException {
            FileSplit split = (FileSplit) context.getInputSplit();
            flag = split.getPath().getParent().getName();// 判断读的数据集
        }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.