Examples of org.apache.hadoop.io.SequenceFile.Writer

Package org.apache.hadoop.io.SequenceFile

Examples of org.apache.hadoop.io.SequenceFile.Writer

org.apache.hadoop.io.SequenceFile.Writer

      
      if (numSpills == 0) {
        //create dummy files
        for (int i = 0; i < partitions; i++) {
          segmentStart = finalOut.getPos();
          Writer writer = SequenceFile.createWriter(job, finalOut, 
                                                    job.getMapOutputKeyClass(), 
                                                    job.getMapOutputValueClass(), 
                                                    compressionType, codec);
          finalIndexOut.writeLong(segmentStart);
          finalIndexOut.writeLong(finalOut.getPos() - segmentStart);
          writer.close();
        }
        finalOut.close();
        finalIndexOut.close();
        return;
      }
      {
        //create a sorter object as we need access to the SegmentDescriptor
        //class and merge methods
        Sorter sorter = new Sorter(localFs, job.getOutputKeyComparator(),
                                   keyClass, valClass, job);
        sorter.setProgressable(reporter);
        
        for (int parts = 0; parts < partitions; parts++){
          List<SegmentDescriptor> segmentList =
            new ArrayList<SegmentDescriptor>(numSpills);
          for(int i = 0; i < numSpills; i++) {
            FSDataInputStream indexIn = localFs.open(indexFileName[i]);
            indexIn.seek(parts * 16);
            long segmentOffset = indexIn.readLong();
            long segmentLength = indexIn.readLong();
            indexIn.close();
            SegmentDescriptor s = sorter.new SegmentDescriptor(segmentOffset,
                                                               segmentLength, filename[i]);
            s.preserveInput(true);
            s.doSync();
            segmentList.add(i, s);
          }
          segmentStart = finalOut.getPos();
          RawKeyValueIterator kvIter = sorter.merge(segmentList, new Path(getTaskId())); 
          SequenceFile.Writer writer = SequenceFile.createWriter(job, finalOut, 
                                                                 job.getMapOutputKeyClass(), job.getMapOutputValueClass(), 
                                                                 compressionType, codec);
          sorter.writeFile(kvIter, writer);
          //close the file - required esp. for block compression to ensure
          //partition data don't span partition boundaries
          writer.close();
          //when we write the offset/length to the final index file, we write
          //longs for both. This helps us to reliably seek directly to the
          //offset/length for a partition when we start serving the byte-ranges
          //to the reduces. We probably waste some space in the file by doing
          //this as opposed to writing VLong but it helps us later on.

View Full Code Here

  /**
   * 
   * Finds out the cluster directory of the vector and writes it into the specified cluster.
   */
  private void putVectorInRespectiveCluster(String clusterId, WeightedVectorWritable point) throws IOException {
    Writer writer = findWriterForVector(clusterId);
    postProcessedClusterDirectories.put(clusterId,
                                        PathDirectory.getClusterPathForClusterId(clusterPostProcessorOutput, clusterId));
    writeVectorToCluster(writer, point);
  }

View Full Code Here

  /**
   * Finds out the path in cluster where the point is supposed to be written.
   */
  private Writer findWriterForVector(String clusterId) throws IOException {
    Path clusterDirectory = PathDirectory.getClusterPathForClusterId(clusterPostProcessorOutput, clusterId);
    Writer writer = writersForClusters.get(clusterId);
    if (writer == null) {
      Path pathToWrite = new Path(clusterDirectory, new Path("part-m-0"));
      writer = new Writer(fileSystem, conf, pathToWrite, LongWritable.class, VectorWritable.class);
      writersForClusters.put(clusterId, writer);
    }
    return writer;
  }

View Full Code Here

    if (fs.exists(in))
      fs.delete(in, true);


    final NullWritable value = NullWritable.get();


    Writer centerWriter = new SequenceFile.Writer(fs, conf, center,
        VectorWritable.class, NullWritable.class);


    final SequenceFile.Writer dataWriter = SequenceFile.createWriter(fs, conf,
        in, VectorWritable.class, NullWritable.class, CompressionType.NONE);


    int i = 0;


    BufferedReader br = new BufferedReader(
        new InputStreamReader(fs.open(txtIn)));
    String line;
    while ((line = br.readLine()) != null) {
      String[] split = line.split("\t");
      int columnLength = split.length;
      int indexPos = 0;
      if (hasKey) {
        columnLength = columnLength - 1;
        indexPos++;
      }


      DenseDoubleVector vec = new DenseDoubleVector(columnLength);
      for (int j = 0; j < columnLength; j++) {
        vec.set(j, Double.parseDouble(split[j + indexPos]));
      }


      VectorWritable vector;
      if (hasKey) {
        NamedDoubleVector named = new NamedDoubleVector(split[0], vec);
        vector = new VectorWritable(named);
      } else {
        vector = new VectorWritable(vec);
      }


      dataWriter.append(vector, value);
      if (k > i) {
        centerWriter.append(vector, value);
      }
      i++;
    }
    br.close();
    centerWriter.close();
    dataWriter.close();
    return in;
  }

View Full Code Here

      currentFile = 0;
    }
    Path path = new Path(_path, buffer(currentFile + 1));
    LOG.info("Creating new snapshot file [{0}]", path);
    FSDataOutputStream outputStream = fileSystem.create(path, false);
    Writer writer = SequenceFile.createWriter(_configuration, outputStream, Text.class, LongWritable.class,
        CompressionType.NONE, null);
    for (Entry<String, Long> e : _namesToGenerations.entrySet()) {
      writer.append(new Text(e.getKey()), new LongWritable(e.getValue()));
    }
    writer.close();
    outputStream.close();
    cleanupOldFiles();
  }

View Full Code Here

        Path dest = new Path(fullyQualifiedFileName + ".copy");
        log.debug("Copying log file to DSF " + dest);
        fs.delete(dest, true);
        LogFileKey key = new LogFileKey();
        LogFileValue value = new LogFileValue();
        Writer writer = null;
        Reader reader = null;
        try {
          short replication = (short) acuConf.getCount(Property.LOGGER_RECOVERY_FILE_REPLICATION);
          writer = SequenceFile.createWriter(fs, fs.getConf(), dest, LogFileKey.class, LogFileValue.class, fs.getConf().getInt("io.file.buffer.size", 4096),
              replication, fs.getDefaultBlockSize(), SequenceFile.CompressionType.BLOCK, new DefaultCodec(), null, new Metadata());
          FileSystem local = TraceFileSystem.wrap(FileSystem.getLocal(fs.getConf()).getRaw());
          reader = new SequenceFile.Reader(local, new Path(findLocalFilename(localLog)), fs.getConf());
          while (reader.next(key, value)) {
            writer.append(key, value);
          }
        } catch (IOException ex) {
          log.warn("May have a partial copy of a recovery file: " + localLog, ex);
        } finally {
          if (reader != null)
            reader.close();
          if (writer != null)
            writer.close();
        }
        // Make file appear in the shared file system as the target name only after it is completely copied
        fs.rename(dest, new Path(fullyQualifiedFileName));
        log.info("Copying " + localLog + " complete");
      }

View Full Code Here

    Path path = new Path(pathString);
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(path.toUri(), conf);
    Class<LongWritable> kClass = LongWritable.class;
    Class<Text> vClass = Text.class;
    Writer writer = new SequenceFile.Writer(fs, conf, path, kClass, vClass);
    int numOfLines = strings.length;
    Random random = new Random(System.currentTimeMillis());
    for (int i = 0; i < num; i++) {
      LongWritable key = new LongWritable(i);
      Text value = new Text(strings[random.nextInt(numOfLines)]);
      writer.append(key, value);
    }
    writer.close();
  }

View Full Code Here

    // store the output in a sequence file
    Path base = getTestTempDirPath("testdata");
    FileSystem fs = base.getFileSystem(conf);


    Path outputFile = new Path(base, "PartialBuilderTest.seq");
    Writer writer = SequenceFile.createWriter(fs, conf, outputFile,
        TreeID.class, MapredOutput.class);


    for (int index = 0; index < NUM_TREES; index++) {
      writer.append(keys[index], values[index]);
    }
    writer.close();


    // load the output and make sure its valid
    TreeID[] newKeys = new TreeID[NUM_TREES];
    Node[] newTrees = new Node[NUM_TREES];

View Full Code Here

    FileSystem fs = base.getFileSystem(conf);
    if (fs.exists(base))
      fs.delete(base, true);


    Path outputFile = new Path(base, "PartialBuilderTest.seq");
    Writer writer = SequenceFile.createWriter(fs, conf, outputFile,
        TreeID.class, MapredOutput.class);


    for (int index = 0; index < numTrees; index++) {
      writer.append(keys[index], values[index]);
    }
    writer.close();


    // load the output and make sure its valid
    TreeID[] newKeys = new TreeID[numTrees];
    Node[] newTrees = new Node[numTrees];

View Full Code Here

      
      if (numSpills == 0) {
        //create dummy files
        for (int i = 0; i < partitions; i++) {
          segmentStart = finalOut.getPos();
          Writer writer = SequenceFile.createWriter(job, finalOut, 
                                                    job.getMapOutputKeyClass(), 
                                                    job.getMapOutputValueClass(), 
                                                    compressionType, codec);
          finalIndexOut.writeLong(segmentStart);
          finalIndexOut.writeLong(finalOut.getPos() - segmentStart);
          writer.close();
        }
        finalOut.close();
        finalIndexOut.close();
        return;
      }
      {
        //create a sorter object as we need access to the SegmentDescriptor
        //class and merge methods
        Sorter sorter = new Sorter(localFs, job.getOutputKeyComparator(), valClass, job);
        sorter.setProgressable(reporter);
        
        for (int parts = 0; parts < partitions; parts++){
          List<SegmentDescriptor> segmentList =
            new ArrayList<SegmentDescriptor>(numSpills);
          for(int i = 0; i < numSpills; i++) {
            FSDataInputStream indexIn = localFs.open(indexFileName[i]);
            indexIn.seek(parts * 16);
            long segmentOffset = indexIn.readLong();
            long segmentLength = indexIn.readLong();
            indexIn.close();
            SegmentDescriptor s = sorter.new SegmentDescriptor(segmentOffset,
                                                               segmentLength, filename[i]);
            s.preserveInput(true);
            s.doSync();
            segmentList.add(i, s);
          }
          segmentStart = finalOut.getPos();
          RawKeyValueIterator kvIter = sorter.merge(segmentList, new Path(getTaskId())); 
          SequenceFile.Writer writer = SequenceFile.createWriter(job, finalOut, 
                                                                 job.getMapOutputKeyClass(), job.getMapOutputValueClass(), 
                                                                 compressionType, codec);
          sorter.writeFile(kvIter, writer);
          //close the file - required esp. for block compression to ensure
          //partition data don't span partition boundaries
          writer.close();
          //when we write the offset/length to the final index file, we write
          //longs for both. This helps us to reliably seek directly to the
          //offset/length for a partition when we start serving the byte-ranges
          //to the reduces. We probably waste some space in the file by doing
          //this as opposed to writing VLong but it helps us later on.

View Full Code Here

0 1 2 3 4 5 6

TOP

Related Classes of org.apache.hadoop.io.SequenceFile.Writer

com.cloudera.flume.handlers.hdfs.DFSEventSink

com.cloudera.flume.handlers.hdfs.TestDFSWrite

com.cloudera.flume.PerfHdfsIO

com.digitalpebble.behemoth.SerializationTest

com.m6d.filecrush.crush.Crush

com.m6d.filecrush.crush.CrushPartitionerTest

com.m6d.filecrush.crush.CrushReducerParameterizedTest

com.m6d.filecrush.crush.CrushStandAloneSequenceFileTest

com.m6d.filecrush.crush.integration.CrushMapReduceTest

net.sf.katta.indexing.SequenceFileCreator

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.