Package eu.stratosphere.api.common.io

Source Code of eu.stratosphere.api.common.io.DelimitedInputFormatTest$MyTextInputFormat

/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/

package eu.stratosphere.api.common.io;

import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.OutputStreamWriter;

import org.apache.log4j.Level;
import org.junit.After;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;

import eu.stratosphere.configuration.Configuration;
import eu.stratosphere.core.fs.FileInputSplit;
import eu.stratosphere.core.fs.Path;
import eu.stratosphere.types.Record;
import eu.stratosphere.types.StringValue;
import eu.stratosphere.util.LogUtils;

public class DelimitedInputFormatTest {
 
  protected Configuration config;
 
  protected File tempFile;
 
  private final DelimitedInputFormat<Record> format = new MyTextInputFormat();
 
  // --------------------------------------------------------------------------------------------
 
  @BeforeClass
  public static void initialize() {
    LogUtils.initializeDefaultConsoleLogger(Level.WARN);
  }
 
  @Before
  public void setup() {
    this.format.setFilePath(new Path("file:///some/file/that/will/not/be/read"));
    this.config = new Configuration();
  }
 
  @After
  public void setdown() throws Exception {
    if (this.format != null) {
      this.format.close();
    }
    if (this.tempFile != null) {
      this.tempFile.delete();
    }
  }

  // --------------------------------------------------------------------------------------------
  // --------------------------------------------------------------------------------------------
  @Test
  public void testConfigure() {
    this.config.setString("delimited-format.delimiter", "\n");
   
    format.configure(this.config);
    assertEquals("\n", new String(format.getDelimiter()));

    this.config.setString("delimited-format.delimiter", "&-&");
    format.configure(this.config);
    assertEquals("&-&", new String(format.getDelimiter()));
  }
 
  @Test
  public void testSerialization() throws Exception {
    final byte[] DELIMITER = new byte[] {1, 2, 3, 4};
    final int NUM_LINE_SAMPLES = 7;
    final int LINE_LENGTH_LIMIT = 12345;
    final int BUFFER_SIZE = 178;
   
    DelimitedInputFormat<Record> format = new MyTextInputFormat();
    format.setDelimiter(DELIMITER);
    format.setNumLineSamples(NUM_LINE_SAMPLES);
    format.setLineLengthLimit(LINE_LENGTH_LIMIT);
    format.setBufferSize(BUFFER_SIZE);
   
    ByteArrayOutputStream baos = new ByteArrayOutputStream(4096);
    ObjectOutputStream oos = new ObjectOutputStream(baos);
    oos.writeObject(format);
    oos.flush();
    oos.close();
   
    ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(baos.toByteArray()));
    @SuppressWarnings("unchecked")
    DelimitedInputFormat<Record> deserialized = (DelimitedInputFormat<Record>) ois.readObject();
   
    assertEquals(NUM_LINE_SAMPLES, deserialized.getNumLineSamples());
    assertEquals(LINE_LENGTH_LIMIT, deserialized.getLineLengthLimit());
    assertEquals(BUFFER_SIZE, deserialized.getBufferSize());
    assertArrayEquals(DELIMITER, deserialized.getDelimiter());
  }

  @Test
  public void testOpen() throws IOException {
    final String myString = "my mocked line 1\nmy mocked line 2\n";
    final FileInputSplit split = createTempFile(myString)
   
    int bufferSize = 5;
    format.setBufferSize(bufferSize);
    format.open(split);
    assertEquals(0, format.splitStart);
    assertEquals(myString.length() - bufferSize, format.splitLength);
    assertEquals(bufferSize, format.getBufferSize());
  }

  @Test
  public void testRead() throws IOException {
    final String myString = "my key|my val$$$my key2\n$$ctd.$$|my value2";
    final FileInputSplit split = createTempFile(myString);
   
    final Configuration parameters = new Configuration();
   
    format.setDelimiter("$$$");
    format.configure(parameters);
    format.open(split);
   
    Record theRecord = new Record();

    assertNotNull(format.nextRecord(theRecord));
    assertEquals("my key", theRecord.getField(0, StringValue.class).getValue());
    assertEquals("my val", theRecord.getField(1, StringValue.class).getValue());
   
    assertNotNull(format.nextRecord(theRecord));
    assertEquals("my key2\n$$ctd.$$", theRecord.getField(0, StringValue.class).getValue());
    assertEquals("my value2", theRecord.getField(1, StringValue.class).getValue());
   
    assertNull(format.nextRecord(theRecord));
    assertTrue(format.reachedEnd());
  }
 
  @Test
  public void testRead2() throws IOException {
    // 2. test case
    final String myString = "my key|my val$$$my key2\n$$ctd.$$|my value2";
    final FileInputSplit split = createTempFile(myString);
   
    final Configuration parameters = new Configuration();
    // default delimiter = '\n'
   
    format.configure(parameters);
    format.open(split);

    Record theRecord = new Record();

    assertNotNull(format.nextRecord(theRecord));
    assertEquals("my key", theRecord.getField(0, StringValue.class).getValue());
    assertEquals("my val$$$my key2", theRecord.getField(1, StringValue.class).getValue());
   
    assertNotNull(format.nextRecord(theRecord));
    assertEquals("$$ctd.$$", theRecord.getField(0, StringValue.class).getValue());
    assertEquals("my value2", theRecord.getField(1, StringValue.class).getValue());
   
    assertNull(format.nextRecord(theRecord));
    assertTrue(format.reachedEnd());
  }
 
  private FileInputSplit createTempFile(String contents) throws IOException {
    this.tempFile = File.createTempFile("test_contents", "tmp");
    this.tempFile.deleteOnExit();
   
    OutputStreamWriter wrt = new OutputStreamWriter(new FileOutputStream(this.tempFile));
    wrt.write(contents);
    wrt.close();
   
    return new FileInputSplit(0, new Path(this.tempFile.toURI().toString()), 0, this.tempFile.length(), new String[] {"localhost"});
  }
 
  protected static final class MyTextInputFormat extends eu.stratosphere.api.common.io.DelimitedInputFormat<Record> {
    private static final long serialVersionUID = 1L;
   
    private final StringValue str1 = new StringValue();
    private final StringValue str2 = new StringValue();
   
    @Override
    public Record readRecord(Record reuse, byte[] bytes, int offset, int numBytes) {
      String theRecord = new String(bytes, offset, numBytes);
     
      str1.setValue(theRecord.substring(0, theRecord.indexOf('|')));
      str2.setValue(theRecord.substring(theRecord.indexOf('|') + 1));
     
      reuse.setField(0, str1);
      reuse.setField(1, str2);
      return reuse;
    }
  }
}
TOP

Related Classes of eu.stratosphere.api.common.io.DelimitedInputFormatTest$MyTextInputFormat

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.