Examples of InputSplit


Examples of org.apache.hadoop.mapreduce.InputSplit

    List<InputSplit> splits = aif.getSplits(job);

    Assert.assertEquals(1, splits.size());

    InputSplit split = splits.get(0);

    Assert.assertEquals(RangeInputSplit.class, split.getClass());

    RangeInputSplit risplit = (RangeInputSplit) split;

    Assert.assertEquals(username, risplit.getUsername());
    Assert.assertEquals(table, risplit.getTable());
View Full Code Here

Examples of org.apache.hadoop.mapreduce.InputSplit

                  .getFirst()[i] : startRow;
          byte[] splitStop =
              (stopRow.length == 0 || Bytes.compareTo(keys.getSecond()[i],
                  stopRow) <= 0) && keys.getSecond()[i].length > 0 ? keys
                  .getSecond()[i] : stopRow;
          InputSplit split =
              new TableSplit(tableName, scan, splitStart,
                  splitStop, regionLocation);
          splits.add(split);
          if (LOG.isDebugEnabled())
            LOG.debug("getSplits: split -> " + (count++) + " -> " + split);
View Full Code Here

Examples of org.apache.hadoop.mapreduce.InputSplit

          HConstants.EMPTY_BYTE_ARRAY, false);
      if (null == regLoc) {
        throw new IOException("Expecting at least one region.");
      }
      List<InputSplit> splits = new ArrayList<InputSplit>(1);
      InputSplit split = new TableSplit(table.getTableName(),
          HConstants.EMPTY_BYTE_ARRAY, HConstants.EMPTY_BYTE_ARRAY, regLoc
              .getHostnamePort().split(Addressing.HOSTNAME_PORT_SEPARATOR)[0]);
      splits.add(split);
      return splits;
    }
    List<InputSplit> splits = new ArrayList<InputSplit>(keys.getFirst().length);
    for (int i = 0; i < keys.getFirst().length; i++) {
      if ( !includeRegionInSplit(keys.getFirst()[i], keys.getSecond()[i])) {
        continue;
      }
      HServerAddress regionServerAddress =
        table.getRegionLocation(keys.getFirst()[i]).getServerAddress();
      InetAddress regionAddress =
        regionServerAddress.getInetSocketAddress().getAddress();
      String regionLocation;
      try {
        regionLocation = reverseDNS(regionAddress);
      } catch (NamingException e) {
        LOG.error("Cannot resolve the host name for " + regionAddress +
            " because of " + e);
        regionLocation = regionServerAddress.getHostname();
      }

      byte[] startRow = scan.getStartRow();
      byte[] stopRow = scan.getStopRow();
      // determine if the given start an stop key fall into the region
      if ((startRow.length == 0 || keys.getSecond()[i].length == 0 ||
           Bytes.compareTo(startRow, keys.getSecond()[i]) < 0) &&
          (stopRow.length == 0 ||
           Bytes.compareTo(stopRow, keys.getFirst()[i]) > 0)) {
        byte[] splitStart = startRow.length == 0 ||
          Bytes.compareTo(keys.getFirst()[i], startRow) >= 0 ?
            keys.getFirst()[i] : startRow;
        byte[] splitStop = (stopRow.length == 0 ||
          Bytes.compareTo(keys.getSecond()[i], stopRow) <= 0) &&
          keys.getSecond()[i].length > 0 ?
            keys.getSecond()[i] : stopRow;
        InputSplit split = new TableSplit(table.getTableName(),
          splitStart, splitStop, regionLocation);
        splits.add(split);
        if (LOG.isDebugEnabled()) {
          LOG.debug("getSplits: split -> " + i + " -> " + split);
        }
View Full Code Here

Examples of org.apache.hadoop.mapreduce.InputSplit

    public void readFields(DataInput input) throws IOException {
        String partitionInfoString = WritableUtils.readString(input);
        partitionInfo = (PartInfo) HCatUtil.deserialize(partitionInfoString);

        String baseSplitClassName = WritableUtils.readString(input);
        InputSplit split;
        try{
            Class<? extends InputSplit> splitClass =
                (Class<? extends InputSplit>) Class.forName(baseSplitClassName);

            //Class.forName().newInstance() does not work if the underlying
View Full Code Here

Examples of org.apache.hadoop.mapreduce.InputSplit

     * @see org.apache.hadoop.mapreduce.RecordReader#initialize(org.apache.hadoop.mapreduce.InputSplit, org.apache.hadoop.mapreduce.TaskAttemptContext)
     */
    @Override
    public void initialize(InputSplit split, TaskAttemptContext taskContext)
    throws IOException, InterruptedException {
        InputSplit baseSplit = split;

        if( split instanceof HCatSplit ) {
            baseSplit = ((HCatSplit) split).getBaseSplit();
        }

View Full Code Here

Examples of org.apache.hadoop.mapreduce.InputSplit

            keys.getFirst()[i] : startRow;
        byte[] splitStop = (stopRow.length == 0 ||
          Bytes.compareTo(keys.getSecond()[i], stopRow) <= 0) &&
          keys.getSecond()[i].length > 0 ?
            keys.getSecond()[i] : stopRow;
        InputSplit split = new TableSplit(table.getTableName(),
          splitStart, splitStop, regionLocation);
        splits.add(split);
        if (LOG.isDebugEnabled())
          LOG.debug("getSplits: split -> " + (count++) + " -> " + split);
      }
View Full Code Here

Examples of org.apache.hadoop.mapreduce.InputSplit

    HCatSchema schema = buildHiveSchema();
    RCFileInputDriver sd = new RCFileInputDriver();
    JobContext jc = new JobContext(conf, new JobID());
    sd.setInputPath(jc, file.toString());
    InputFormat<?,?> iF = sd.getInputFormat(null);
    InputSplit split = iF.getSplits(jc).get(0);
    sd.setOriginalSchema(jc, schema);
    sd.setOutputSchema(jc, schema);
    sd.initialize(jc, getProps());

    TaskAttemptContext tac = new TaskAttemptContext(conf, new TaskAttemptID());
View Full Code Here

Examples of org.apache.hadoop.mapreduce.InputSplit

    RCFileInputDriver sd = new RCFileInputDriver();
    JobContext jc = new JobContext(conf, new JobID());
    sd.setInputPath(jc, file.toString());
    InputFormat<?,?> iF = sd.getInputFormat(null);
    InputSplit split = iF.getSplits(jc).get(0);
    sd.setOriginalSchema(jc, buildHiveSchema());
    sd.setOutputSchema(jc, buildPrunedSchema());

    sd.initialize(jc, getProps());
    conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR,jc.getConfiguration().get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR));
View Full Code Here

Examples of org.apache.hadoop.mapreduce.InputSplit

    RCFileInputDriver sd = new RCFileInputDriver();
    JobContext jc = new JobContext(conf, new JobID());
    sd.setInputPath(jc, file.toString());
    InputFormat<?,?> iF = sd.getInputFormat(null);
    InputSplit split = iF.getSplits(jc).get(0);
    sd.setOriginalSchema(jc, buildHiveSchema());
    sd.setOutputSchema(jc, buildReorderedSchema());

    sd.initialize(jc, getProps());
    Map<String,String> map = new HashMap<String,String>(1);
View Full Code Here

Examples of org.apache.hadoop.mapreduce.InputSplit

   
    int expectedSplitCount = (int)(totLength/maxSize);
    Assert.assertEquals(expectedSplitCount, splits.size());
    HashMultiset<String> nodeSplits = HashMultiset.create();
    for(int i=0; i<expectedSplitCount; ++i) {
      InputSplit inSplit = splits.get(i);
      Assert.assertEquals(maxSize, inSplit.getLength());
      Assert.assertEquals(1, inSplit.getLocations().length);
      nodeSplits.add(inSplit.getLocations()[0]);
    }
    Assert.assertEquals(3, nodeSplits.count(locations[0]));
    Assert.assertEquals(3, nodeSplits.count(locations[1]));
  }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.