Examples of FileSplit


Examples of com.cloudera.iterativereduce.yarn.avro.generated.FileSplit

  for ( InputSplit split : splits) {
   

   
    FileSplit convertedToMetronomeSplit = new FileSplit();
   
    org.apache.hadoop.mapred.FileSplit hadoopFileSplit = (org.apache.hadoop.mapred.FileSplit)split;

    if (hadoopFileSplit.getLength() - hadoopFileSplit.getStart() > 0) {
     
      convertedToMetronomeSplit.length = hadoopFileSplit.getLength();
      convertedToMetronomeSplit.offset = hadoopFileSplit.getStart();
      convertedToMetronomeSplit.path = hadoopFileSplit.getPath().toString();
     
        StartupConfiguration config = StartupConfiguration.newBuilder()
              .setBatchSize(batchSize).setIterations(iterationCount)
              .setOther(appConfig).setSplit( convertedToMetronomeSplit ).build();
             
        String wid = "worker-" + workerId;
        ConfigurationTuple tuple = new ConfigurationTuple( split.getLocations()[ 0 ], wid, config );
     
        configTuples.add(tuple);
        workerId++; 
       
        System.out.println( "IR_AM_worker: " + wid + " added split: " + convertedToMetronomeSplit.toString() );
       
    } else {
      System.out.println( "IR_AM: Culled out 0 length Split: " + convertedToMetronomeSplit.toString() );
    }
   
   
   
  }
View Full Code Here

Examples of edu.uci.ics.hyracks.dataflow.std.file.FileSplit

                        XMLQueryCompiler compiler = new XMLQueryCompiler(null);
                        File tempFile = File.createTempFile(testCase.getXQueryFile().getName(), ".tmp");
                        tempFile.deleteOnExit();
                        Reader in = new InputStreamReader(new FileInputStream(testCase.getXQueryFile()), "UTF-8");
                        CompilerControlBlock ccb = new CompilerControlBlock(new StaticContextImpl(
                                RootStaticContextImpl.INSTANCE), new FileSplit[] { new FileSplit("nc1",
                                tempFile.getAbsolutePath()) });
                        compiler.compile(testCase.getXQueryDisplayName(), in, ccb, opts.optimizationLevel);
                        JobSpecification spec = compiler.getModule().getHyracksJobSpecification();

                        DynamicContext dCtx = new DynamicContextImpl(compiler.getModule().getModuleContext());
View Full Code Here

Examples of org.apache.hadoop.mapred.FileSplit

           sl.next(key, value); last = sl.getPosition()) {
        // if adding this split would put this split past the target size,
        // cut the last split and put this next file in the next split.
        if (acc + key.get() > targetsize && acc != 0) {
          long splitsize = last - pos;
          splits.add(new FileSplit(src, pos, splitsize, job));
          cbrem -= splitsize;
          pos = last;
          acc = 0L;
        }
        acc += key.get();
      }
      if (cbrem != 0) {
        splits.add(new FileSplit(src, pos, cbrem, job));
      }

      return splits.toArray(new FileSplit[splits.size()]);
    }
View Full Code Here

Examples of org.apache.hadoop.mapred.FileSplit

        Path jsonFilePath = new Path(jsonDataFile);
        FileStatus jsonFileStatus = jsonFilePath.getFileSystem(new Configuration())
                                                .listStatus(jsonFilePath)[0];
        final SequenceFileRecordReader<BytesWritable, BytesWritable> reader = new SequenceFileRecordReader<BytesWritable, BytesWritable>(new Configuration(),
                                                                                                                                         new FileSplit(jsonFilePath,
                                                                                                                                                       0,
                                                                                                                                                       jsonFileStatus.getLen(),
                                                                                                                                                       (String[]) null));

        PerformanceTest readWriteTest = new PerformanceTest() {
View Full Code Here

Examples of org.apache.hadoop.mapred.FileSplit

        Path jsonFilePath = new Path(jsonDataFile);
        FileStatus jsonFileStatus = jsonFilePath.getFileSystem(new Configuration())
                                                .listStatus(jsonFilePath)[0];
        final SequenceFileRecordReader<BytesWritable, BytesWritable> reader = new SequenceFileRecordReader<BytesWritable, BytesWritable>(new Configuration(),
                                                                                                                                         new FileSplit(jsonFilePath,
                                                                                                                                                       0,
                                                                                                                                                       jsonFileStatus.getLen(),
                                                                                                                                                       (String[]) null));

        PerformanceTest readWriteTest = new PerformanceTest() {
View Full Code Here

Examples of org.apache.hadoop.mapred.FileSplit

  @Override
  public RecordReader<NullWritable, OrcLazyRow>
      getRecordReader(InputSplit inputSplit, JobConf conf,
                      Reporter reporter) throws IOException {
    ReaderWriterProfiler.setProfilerOptions(conf);
    FileSplit fileSplit = (FileSplit) inputSplit;
    Path path = fileSplit.getPath();
    FileSystem fs = path.getFileSystem(conf);
    reporter.setStatus(fileSplit.toString());

    try {
      return new OrcRecordReader(
          OrcFile.createReader(fs, path, conf),
          conf,
          fileSplit.getStart(),
          fileSplit.getLength());
    } catch (IndexOutOfBoundsException e) {
      /**
       * When a non ORC file is read by ORC reader, we get IndexOutOfBoundsException exception while
       * creating a reader. Caught that exception and checked the file header to see if the input
       * file was ORC or not. If its not ORC, throw a NotAnORCFileException with the file
View Full Code Here

Examples of org.apache.hadoop.mapred.FileSplit

      try {
        reader = new SequenceFile.Reader(fs, src, jconf);
        while(reader.next(key, value)) {
          if (currentCount + key.get() > targetSize && currentCount != 0){
            long size = lastPos - startPos;
            splits.add(new FileSplit(src, startPos, size, (String[]) null));
            remaining = remaining - size;
            startPos = lastPos;
            currentCount = 0L;
          }
          currentCount += key.get();
          lastPos = reader.getPosition();
        }
        // the remaining not equal to the target size.
        if (remaining != 0) {
          splits.add(new FileSplit(src, startPos, remaining, (String[])null));
        }
      }
      finally {
        reader.close();
      }
View Full Code Here

Examples of org.apache.hadoop.mapred.FileSplit

        for (; sl.next(key, value); last = sl.getPosition()) {
          // if adding this split would put this split past the target size,
          // cut the last split and put this next file in the next split.
          if (acc + key.get() > targetsize && acc != 0) {
            long splitsize = last - pos;
            splits.add(new FileSplit(src, pos, splitsize, (String[])null));
            cbrem -= splitsize;
            pos = last;
            acc = 0L;
          }
          acc += key.get();
        }
      }
      finally {
        checkAndClose(sl);
      }
      if (cbrem != 0) {
        splits.add(new FileSplit(src, pos, cbrem, (String[])null));
      }

      return splits.toArray(new FileSplit[splits.size()]);
    }
View Full Code Here

Examples of org.apache.hadoop.mapred.FileSplit

       
        long starts[] = new long[inputSplits.length];
        long lengths[] = new long[inputSplits.length];
        Path paths[] = new Path [inputSplits.length];
        for (int j=0; j<inputSplits.length; j++) {
          FileSplit fileSplit = (FileSplit) inputSplits[j];
          Path p = fileSplit.getPath();
          long start = fileSplit.getStart();
          long length = fileSplit.getLength();

          starts[j] = start;
          lengths[j] = length;
          paths[j] = p;
        }
View Full Code Here

Examples of org.apache.hadoop.mapred.FileSplit

      @Override
      public void map(LongWritable key, Text value,
          OutputCollector<BytesWritable, Tuple> output, Reporter reporter)
          throws IOException {
        if (filePath == null) {
          FileSplit split = (FileSplit) reporter.getInputSplit();
          filePath = split.getPath().toString();
        }
        String line = value.toString();
        StringTokenizer st = new StringTokenizer(line, " ");
        while (st.hasMoreElements()) {
          byte[] word = st.nextToken().getBytes();
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.