Package eu.stratosphere.core.fs

Examples of eu.stratosphere.core.fs.FileInputSplit


 
  @Test
  public void testReadTooShortInput() throws IOException {
    try {
      final String fileContent = "111|222|333|444\n666|777|888|999";
      final FileInputSplit split = createTempFile(fileContent)
   
      final Configuration parameters = new Configuration();
      format.setFieldDelimiter('|');
      format.setFieldTypesGeneric(IntValue.class, IntValue.class, IntValue.class, IntValue.class, IntValue.class);
     
View Full Code Here


 
  @Test
  public void testReadTooShortInputLenient() throws IOException {
    try {
      final String fileContent = "666|777|888|999|555\n111|222|333|444\n666|777|888|999|555";
      final FileInputSplit split = createTempFile(fileContent)
   
      final Configuration parameters = new Configuration();
      format.setFieldDelimiter('|');
      format.setFieldTypesGeneric(IntValue.class, IntValue.class, IntValue.class, IntValue.class, IntValue.class);
      format.setLenient(true);
View Full Code Here

 
  @Test
  public void testReadInvalidContents() throws IOException {
    try {
      final String fileContent = "abc|222|def|444\nkkz|777|888|hhg";
      final FileInputSplit split = createTempFile(fileContent);
   
      final Configuration parameters = new Configuration();

      format.setFieldDelimiter('|');
      format.setFieldTypesGeneric(StringValue.class, IntValue.class, StringValue.class, IntValue.class);
View Full Code Here

 
  @Test
  public void testReadInvalidContentsLenient() {
    try {
      final String fileContent = "abc|222|def|444\nkkz|777|888|hhg";
      final FileInputSplit split = createTempFile(fileContent)
   
      final Configuration parameters = new Configuration();

      format.setFieldDelimiter('|');
      format.setFieldTypesGeneric(StringValue.class, IntValue.class, StringValue.class, IntValue.class);
View Full Code Here

    try {
      final String fileContent = "abc|dfgsdf|777|444\n" // good line
                  "kkz|777|foobar|hhg\n" // wrong data type in field
                  "kkz|777foobarhhg  \n" // too short, a skipped field never ends
                  "xyx|ignored|42|\n";      // another good line
      final FileInputSplit split = createTempFile(fileContent)
   
      final Configuration parameters = new Configuration();

      format.setFieldDelimiter('|');
      format.setFieldTypesGeneric(StringValue.class, null, IntValue.class);
View Full Code Here

 
  @Test
  public void readWithEmptyField() {
    try {
      final String fileContent = "abc|def|ghijk\nabc||hhg\n|||";
      final FileInputSplit split = createTempFile(fileContent)
   
      final Configuration parameters = new Configuration();

      format.setFieldDelimiter('|');
      format.setFieldTypesGeneric(StringValue.class, StringValue.class, StringValue.class);
View Full Code Here

    try {
      final String fileContent = "colname-1|colname-2|some name 3|column four|\n" +
                  "123|abc|456|def|\n"+
                  "987|xyz|654|pqr|\n";
     
      final FileInputSplit split = createTempFile(fileContent);
   
      final Configuration parameters = new Configuration();

      format.setFieldDelimiter('|');
      format.setFieldTypesGeneric(IntValue.class, StringValue.class, IntValue.class, StringValue.class);
View Full Code Here

      final String fileContent = "colname-1|colname-2|some name 3|column four|\n" +
                  "123|abc|456|def|\n"+
                  "colname-1|colname-2|some name 3|column four|\n" // repeated header in the middle
                  "987|xyz|654|pqr|\n";
     
      final FileInputSplit split = createTempFile(fileContent);
   
      final Configuration parameters = new Configuration();

      format.setFieldDelimiter('|');
      format.setFieldTypesGeneric(IntValue.class, StringValue.class, IntValue.class, StringValue.class);
View Full Code Here

   
    DataOutputStream dos = new DataOutputStream(new FileOutputStream(tempFile));
    dos.writeBytes(content);
    dos.close();
     
    return new FileInputSplit(0, new Path(this.tempFile.toURI().toString()), 0, this.tempFile.length(), new String[] {"localhost"});
  }
View Full Code Here

        }
        long len = file.getLen();
        if(testForUnsplittable(file)) {
          len = READ_WHOLE_SPLIT_FLAG;
        }
        FileInputSplit fis = new FileInputSplit(splitNum++, file.getPath(), 0, len,
            hosts.toArray(new String[hosts.size()]));
        inputSplits.add(fis);
      }
      return inputSplits.toArray(new FileInputSplit[inputSplits.size()]);
    }
   

    final long maxSplitSize = (minNumSplits < 1) ? Long.MAX_VALUE : (totalLength / minNumSplits +
          (totalLength % minNumSplits == 0 ? 0 : 1));

    // now that we have the files, generate the splits
    int splitNum = 0;
    for (final FileStatus file : files) {

      final long len = file.getLen();
      final long blockSize = file.getBlockSize();
     
      final long minSplitSize;
      if (this.minSplitSize <= blockSize) {
        minSplitSize = this.minSplitSize;
      }
      else {
        if (LOG.isWarnEnabled()) {
          LOG.warn("Minimal split size of " + this.minSplitSize + " is larger than the block size of " +
            blockSize + ". Decreasing minimal split size to block size.");
        }
        minSplitSize = blockSize;
      }

      final long splitSize = Math.max(minSplitSize, Math.min(maxSplitSize, blockSize));
      final long halfSplit = splitSize >>> 1;

      final long maxBytesForLastSplit = (long) (splitSize * MAX_SPLIT_SIZE_DISCREPANCY);

      if (len > 0) {

        // get the block locations and make sure they are in order with respect to their offset
        final BlockLocation[] blocks = fs.getFileBlockLocations(file, 0, len);
        Arrays.sort(blocks);

        long bytesUnassigned = len;
        long position = 0;

        int blockIndex = 0;

        while (bytesUnassigned > maxBytesForLastSplit) {
          // get the block containing the majority of the data
          blockIndex = getBlockIndexForPosition(blocks, position, halfSplit, blockIndex);
          // create a new split
          FileInputSplit fis = new FileInputSplit(splitNum++, file.getPath(), position, splitSize,
            blocks[blockIndex].getHosts());
          inputSplits.add(fis);

          // adjust the positions
          position += splitSize;
          bytesUnassigned -= splitSize;
        }

        // assign the last split
        if (bytesUnassigned > 0) {
          blockIndex = getBlockIndexForPosition(blocks, position, halfSplit, blockIndex);
          final FileInputSplit fis = new FileInputSplit(splitNum++, file.getPath(), position,
            bytesUnassigned, blocks[blockIndex].getHosts());
          inputSplits.add(fis);
        }
      } else {
        // special case with a file of zero bytes size
        final BlockLocation[] blocks = fs.getFileBlockLocations(file, 0, 0);
        String[] hosts;
        if (blocks.length > 0) {
          hosts = blocks[0].getHosts();
        } else {
          hosts = new String[0];
        }
        final FileInputSplit fis = new FileInputSplit(splitNum++, file.getPath(), 0, 0, hosts);
        inputSplits.add(fis);
      }
    }

    return inputSplits.toArray(new FileInputSplit[inputSplits.size()]);
View Full Code Here

TOP

Related Classes of eu.stratosphere.core.fs.FileInputSplit

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.