Examples of IntWritable


Examples of org.apache.hadoop.io.IntWritable

  }
 
  @Test
  public void testDedupByIntWritableKeyWithSequenceFileInputFormat() throws Exception {
    HashMap<IntWritable, Text> inputData1 = new HashMap<IntWritable, Text>();
    inputData1.put(new IntWritable(1), new Text("Xavier Wilson,Mason Holloway,Carlos Johnston,Martin Noel,Drake Mckinney"));
    inputData1.put(new IntWritable(2), new Text("Kennedy Bailey,Jerome Perry,David Cabrera,Edan Fleming,Orlando Tyson"));
    inputData1.put(new IntWritable(3), new Text("Drake Mckinney,Murphy Baird,Theodore Lindsey,Nehru Wilcox,Harper Klein"));
    createSequenceFileInHdfs(inputData1, "/input1", "testFile1.seq");
   
    HashMap<IntWritable, Text> inputData2 = new HashMap<IntWritable, Text>();
    inputData2.put(new IntWritable(1), new Text("Zephania Bauer,Jermaine Gordon,Vincent Moon,Steven Pierce,Jasper Campos"));
    inputData2.put(new IntWritable(2), new Text("Kennedy Bailey,Plato Atkinson,Stuart Guy,Rooney Levy,Judah Benson"));
    inputData2.put(new IntWritable(4), new Text("Drake Mckinney,Murphy Baird,Theodore Lindsey,Nehru Wilcox,Harper Klein"));
    createSequenceFileInHdfs(inputData2, "/input2", "testFile2.seq");
   
    String[] args = new String[] {
        "-inputFormat", "org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat",
        "-inputPath", "/input1,/input2",
        "-outputPath", "output",
        "-inputKeyClassName", "org.apache.hadoop.io.IntWritable",
        "-inputValueClassName", "org.apache.hadoop.io.Text",
        "-dedupBy", "key" };
    DedupJob job = runDedupJob(args)
    assertEquals(6, job.getTotalRecordsRead());
    assertEquals(0, job.getBadRecords());
    assertEquals(4, job.getOutput());
    assertEquals(2, job.getDuplicateRecords());
   
   
    FileSystem outputFS = getFileSystem();
    Path outputPath = new Path(outputFS.getHomeDirectory(), "output/part-r-00000");
    Configuration conf = new Configuration();
    SequenceFile.Reader reader = new SequenceFile.Reader(outputFS, outputPath, conf);
    Writable writableKey = (Writable)
    ReflectionUtils.newInstance(reader.getKeyClass(), conf);
    Writable writableValue = (Writable)
    ReflectionUtils.newInstance(reader.getValueClass(), conf);
    List<IntWritable> expectedOutput = new ArrayList<IntWritable>();
    expectedOutput.add(new IntWritable(1));
    expectedOutput.add(new IntWritable(2));
    expectedOutput.add(new IntWritable(3));
    expectedOutput.add(new IntWritable(4));
    int count = 0;
    while (reader.next(writableKey, writableValue)) {
      logger.debug("key and value is: " + writableKey + ", " + writableValue);
      assertTrue("Matched output " + writableKey , expectedOutput.contains(writableKey));
      count++;
View Full Code Here

Examples of org.apache.hadoop.io.IntWritable

  }
 
  @Test
  public void testDedupByValueWithSequenceFileInputFormat() throws Exception {
    HashMap<IntWritable, Text> inputData1 = new HashMap<IntWritable, Text>();
    inputData1.put(new IntWritable(1), new Text("Xavier Wilson,Mason Holloway,Carlos Johnston,Martin Noel,Drake Mckinney"));
    inputData1.put(new IntWritable(2), new Text("Kennedy Bailey,Jerome Perry,David Cabrera,Edan Fleming,Orlando Tyson"));
    inputData1.put(new IntWritable(3), new Text("Drake Mckinney,Murphy Baird,Theodore Lindsey,Nehru Wilcox,Harper Klein"));
    createSequenceFileInHdfs(inputData1, "/input1", "testFile1.seq");
   
    HashMap<IntWritable, Text> inputData2 = new HashMap<IntWritable, Text>();
    inputData2.put(new IntWritable(1), new Text("Zephania Bauer,Jermaine Gordon,Vincent Moon,Steven Pierce,Jasper Campos"));
    inputData2.put(new IntWritable(2), new Text("Kennedy Bailey,Plato Atkinson,Stuart Guy,Rooney Levy,Judah Benson"));
    inputData2.put(new IntWritable(4), new Text("Drake Mckinney,Murphy Baird,Theodore Lindsey,Nehru Wilcox,Harper Klein"));
    createSequenceFileInHdfs(inputData2, "/input2", "testFile2.seq");
   
    String[] args = new String[] {
        "-inputFormat", "org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat",
        "-inputPath", "/input1,/input2",
View Full Code Here

Examples of org.apache.hadoop.io.IntWritable

  }
 
 
  @Test
  public void testDedupByCustomObjectKeyWithSequenceFileInputFormat() throws Exception {   
    Student student1 = setStudent(new Text("Sam"),new Text("US"),new IntWritable(1),
        new LongWritable(9999999998l),new DoubleWritable(99.12));       
    Student student2 = setStudent(new Text("John"),new Text("AUS"),new IntWritable(2),
        new LongWritable(9999999999l),new DoubleWritable(90.12));       
    Student student3 = setStudent(new Text("Mary"),new Text("UK"),new IntWritable(3),
        new LongWritable(9999999988l),new DoubleWritable(69.12));   
    Student student4 = setStudent(new Text("Kelvin"),new Text("UK"),new IntWritable(4),
        new LongWritable(9999998888l),new DoubleWritable(59.12));
 
    HashMap<Student, Text> inputData1 = new HashMap<Student, Text>();
    inputData1.put(student1, new Text("Macon Kent,6269 Aenean St.,1-247-399-1051,08253"));
    inputData1.put(student2, new Text("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510"));
View Full Code Here

Examples of org.apache.hadoop.io.IntWritable

    return percentage;
  }

  @Override
  public void readFields(DataInput in) throws IOException {
    id = new IntWritable();
    id.readFields(in);
    name = new Text();
    name.readFields(in);
    address = new Text();
    address.readFields(in);
View Full Code Here

Examples of org.apache.hadoop.io.IntWritable

  public static void main(String[] args) throws IOException {
    String uri = "input2.seq";
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(URI.create(uri), conf);
    Path path = new Path(uri);
    IntWritable key = new IntWritable();
    Text value = new Text();
    SequenceFile.Writer writer = null;
    try {
      writer = SequenceFile.createWriter(fs, conf, path, key.getClass(),
          value.getClass());
      for (int i = 0; i < 2; i++) {
        key.set(2 - i);
        value.set(DATA[i % DATA.length]);
        System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key,
            value);
        writer.append(key, value);
      }
View Full Code Here

Examples of org.apache.hadoop.io.IntWritable

  }

  @Test
  public void testReducerForIntWritableKeyAndValue() throws IOException,
      InterruptedException {
    IntWritable key = new IntWritable(123);
    HihoTuple hihoTuple = new HihoTuple();
    hihoTuple.setKey(key);

    HihoValue hihoValue1 = new HihoValue();
    HihoValue hihoValue2 = new HihoValue();
    IntWritable value1 = new IntWritable(456);
    IntWritable value2 = new IntWritable(789);
    hihoValue1.setVal(value1);
    hihoValue2.setVal(value2);
    hihoValue1.setIsOld(true);
    hihoValue2.setIsOld(false);
    ArrayList<HihoValue> values = new ArrayList<HihoValue>();
View Full Code Here

Examples of org.apache.hadoop.io.IntWritable

  }

  @Test
  public void testReducerForIntWritableKeyAndValue() throws IOException,
      InterruptedException {
    IntWritable key = new IntWritable(123);
    HihoTuple hihoTuple = new HihoTuple();
    hihoTuple.setKey(key);

    HihoValue hihoValue1 = new HihoValue();
    HihoValue hihoValue2 = new HihoValue();
    IntWritable value1 = new IntWritable(456);
    IntWritable value2 = new IntWritable(789);
    hihoValue1.setVal(value1);
    hihoValue2.setVal(value2);
    hihoValue1.setIsOld(true);
    hihoValue2.setIsOld(false);
    ArrayList<HihoValue> values = new ArrayList<HihoValue>();
View Full Code Here

Examples of org.apache.hadoop.io.IntWritable

        for(int i = 0; i < billNames2D.size(); i++){
          // if any of the bill's name is mentioned at least once, count as one mention
          for(String billNickname : billNames2D.get(i)){
            if(pageText.indexOf(billNickname) >= 0){
              //emit domain
              output.collect(new IntWritable(i), new Text("d"+domain));

              //emit uncommon words from this page and associate them with bill
              for (String word : pageText.split(" ")) {
                word = word.toLowerCase().trim();
                if(commonWords.indexOf(word) == -1){
                  output.collect(new IntWritable(i), new Text("w"+word));
                }
              }
              break;
            }
          }
View Full Code Here

Examples of org.apache.hadoop.io.IntWritable

      SequenceFile.Reader reader = new SequenceFile.Reader(fs, path,
          conf);
      SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, new Path(path.toString()+"-dict"),
        Text.class, IntWritable.class);
      Text key = new Text();
      IntWritable value = new IntWritable();
      while (reader.next(key, value)) {
        dictionary.put(key.toString(), Integer.valueOf(i++));
        writer.append(key, new IntWritable(i-1));
      }
      writer.close();
    }
    DefaultStringifier<Map<String,Integer>> mapStringifier = new DefaultStringifier<Map<String,Integer>>(
        conf, GenericsUtil.getClass(dictionary));
View Full Code Here

Examples of org.apache.hadoop.io.IntWritable

    if (fields.length < 4) {
      context.getCounter("Map", "LinesWithErrors").increment(1);
      return;
    }
    String artist = fields[1];
    context.write(new Text(artist), new IntWritable(0));
  }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.