Package org.apache.flink.core.fs

Examples of org.apache.flink.core.fs.FileInputSplit


        }
        long len = file.getLen();
        if(testForUnsplittable(file)) {
          len = READ_WHOLE_SPLIT_FLAG;
        }
        FileInputSplit fis = new FileInputSplit(splitNum++, file.getPath(), 0, len,
            hosts.toArray(new String[hosts.size()]));
        inputSplits.add(fis);
      }
      return inputSplits.toArray(new FileInputSplit[inputSplits.size()]);
    }
   

    final long maxSplitSize = (minNumSplits < 1) ? Long.MAX_VALUE : (totalLength / minNumSplits +
          (totalLength % minNumSplits == 0 ? 0 : 1));

    // now that we have the files, generate the splits
    int splitNum = 0;
    for (final FileStatus file : files) {

      final long len = file.getLen();
      final long blockSize = file.getBlockSize();
     
      final long minSplitSize;
      if (this.minSplitSize <= blockSize) {
        minSplitSize = this.minSplitSize;
      }
      else {
        if (LOG.isWarnEnabled()) {
          LOG.warn("Minimal split size of " + this.minSplitSize + " is larger than the block size of " +
            blockSize + ". Decreasing minimal split size to block size.");
        }
        minSplitSize = blockSize;
      }

      final long splitSize = Math.max(minSplitSize, Math.min(maxSplitSize, blockSize));
      final long halfSplit = splitSize >>> 1;

      final long maxBytesForLastSplit = (long) (splitSize * MAX_SPLIT_SIZE_DISCREPANCY);

      if (len > 0) {

        // get the block locations and make sure they are in order with respect to their offset
        final BlockLocation[] blocks = fs.getFileBlockLocations(file, 0, len);
        Arrays.sort(blocks);

        long bytesUnassigned = len;
        long position = 0;

        int blockIndex = 0;

        while (bytesUnassigned > maxBytesForLastSplit) {
          // get the block containing the majority of the data
          blockIndex = getBlockIndexForPosition(blocks, position, halfSplit, blockIndex);
          // create a new split
          FileInputSplit fis = new FileInputSplit(splitNum++, file.getPath(), position, splitSize,
            blocks[blockIndex].getHosts());
          inputSplits.add(fis);

          // adjust the positions
          position += splitSize;
          bytesUnassigned -= splitSize;
        }

        // assign the last split
        if (bytesUnassigned > 0) {
          blockIndex = getBlockIndexForPosition(blocks, position, halfSplit, blockIndex);
          final FileInputSplit fis = new FileInputSplit(splitNum++, file.getPath(), position,
            bytesUnassigned, blocks[blockIndex].getHosts());
          inputSplits.add(fis);
        }
      } else {
        // special case with a file of zero bytes size
        final BlockLocation[] blocks = fs.getFileBlockLocations(file, 0, 0);
        String[] hosts;
        if (blocks.length > 0) {
          hosts = blocks[0].getHosts();
        } else {
          hosts = new String[0];
        }
        final FileInputSplit fis = new FileInputSplit(splitNum++, file.getPath(), 0, 0, hosts);
        inputSplits.add(fis);
      }
    }

    return inputSplits.toArray(new FileInputSplit[inputSplits.size()]);
View Full Code Here


    final FileSystem fs = FileSystem.get(normalizedPath.toUri());
    FileStatus fileStatus = fs.getFileStatus(normalizedPath);

    BlockLocation[] blocks = fs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
    inputFormat.open(new FileInputSplit(0, new Path(path), 0, fileStatus.getLen(), blocks[0].getHosts()));
    return inputFormat;
  }
View Full Code Here

 
  @Test
  public void testReadNoPosAll() throws IOException {
    try {
      final String fileContent = "111|222|333|444|555\n666|777|888|999|000|";
      final FileInputSplit split = createTempFile(fileContent)
   
      final Configuration parameters = new Configuration();
     
      format.setFieldDelimiter('|');
      format.setFieldTypesGeneric(IntValue.class, IntValue.class, IntValue.class, IntValue.class, IntValue.class);
View Full Code Here

 
  @Test
  public void testReadNoPosFirstN() throws IOException {
    try {
      final String fileContent = "111|222|333|444|555|\n666|777|888|999|000|";
      final FileInputSplit split = createTempFile(fileContent)
   
      final Configuration parameters = new Configuration();
     
      format.setFieldDelimiter('|');
      format.setFieldTypesGeneric(IntValue.class, IntValue.class);
View Full Code Here

 
  @Test
  public void testSparseParse() {
    try {
      final String fileContent = "111|222|333|444|555|666|777|888|999|000|\n000|999|888|777|666|555|444|333|222|111|";
      final FileInputSplit split = createTempFile(fileContent)
   
      final Configuration parameters = new Configuration();
     
      format.setFieldDelimiter('|');
      format.setFieldTypesGeneric(IntValue.class, null, null, IntValue.class, null, null, null, IntValue.class);
View Full Code Here

  @Test
  public void testLongLongLong() {
    try {
      final String fileContent = "1,2,3\n3,2,1";
      final FileInputSplit split = createTempFile(fileContent);

      final Configuration parameters = new Configuration();

      format.setFieldDelimiter(',');
      format.setFieldTypesGeneric(LongValue.class, LongValue.class, LongValue.class);
View Full Code Here

  @SuppressWarnings("unchecked")
  @Test
  public void testSparseParseWithIndices() {
    try {
      final String fileContent = "111|222|333|444|555|666|777|888|999|000|\n000|999|888|777|666|555|444|333|222|111|";
      final FileInputSplit split = createTempFile(fileContent);

      final Configuration parameters = new Configuration();

      format.setFieldDelimiter('|');
      format.setFieldsGeneric(new int[] { 0, 3, 7 },
View Full Code Here

 
  @Test
  public void testReadTooShortInput() throws IOException {
    try {
      final String fileContent = "111|222|333|444\n666|777|888|999";
      final FileInputSplit split = createTempFile(fileContent)
   
      final Configuration parameters = new Configuration();
      format.setFieldDelimiter('|');
      format.setFieldTypesGeneric(IntValue.class, IntValue.class, IntValue.class, IntValue.class, IntValue.class);
     
View Full Code Here

 
  @Test
  public void testReadTooShortInputLenient() throws IOException {
    try {
      final String fileContent = "666|777|888|999|555\n111|222|333|444\n666|777|888|999|555";
      final FileInputSplit split = createTempFile(fileContent)
   
      final Configuration parameters = new Configuration();
      format.setFieldDelimiter('|');
      format.setFieldTypesGeneric(IntValue.class, IntValue.class, IntValue.class, IntValue.class, IntValue.class);
      format.setLenient(true);
View Full Code Here

 
  @Test
  public void testReadInvalidContents() throws IOException {
    try {
      final String fileContent = "abc|222|def|444\nkkz|777|888|hhg";
      final FileInputSplit split = createTempFile(fileContent);
   
      final Configuration parameters = new Configuration();

      format.setFieldDelimiter('|');
      format.setFieldTypesGeneric(StringValue.class, IntValue.class, StringValue.class, IntValue.class);
View Full Code Here

TOP

Related Classes of org.apache.flink.core.fs.FileInputSplit

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.