Package org.apache.tez.dag.api

Examples of org.apache.tez.dag.api.TezUncheckedException


        String mode = "map compatability";
        ensureNotSet(conf, MRJobConfig.INPUT_FORMAT_CLASS_ATTR, mode);
        ensureNotSet(conf, MRJobConfig.MAP_CLASS_ATTR, mode);
      }
    } catch (IOException e) {
      throw new TezUncheckedException(e);
    }
  }
View Full Code Here


      Class<? extends InputFormat> clazz = (Class<? extends InputFormat>)
          getClassFromName(split.wrappedInputFormatName);
      try {
        wrappedInputFormat = org.apache.hadoop.util.ReflectionUtils.newInstance(clazz, conf);
      } catch (Exception e) {
        throw new TezUncheckedException(e);
      }
    }
  }
View Full Code Here

      Class<? extends InputFormat> clazz = (Class<? extends InputFormat>)
          getClassFromName(split.wrappedInputFormatName);
      try {
        wrappedInputFormat = org.apache.hadoop.util.ReflectionUtils.newInstance(clazz, conf);
      } catch (Exception e) {
        throw new TezUncheckedException(e);
      }
    }
  }
View Full Code Here

  public void addSplit(InputSplit split) {
    wrappedSplits.add(split);
    try {
      length += split.getLength();
    } catch (Exception e) {
      throw new TezUncheckedException(e);
    }
  }
View Full Code Here

  }
 
  @Override
  public void write(DataOutput out) throws IOException {
    if (wrappedSplits == null) {
      throw new TezUncheckedException("Wrapped splits cannot be empty");
    }

    Text.writeString(out, wrappedInputFormatName);
    Text.writeString(out, wrappedSplits.get(0).getClass().getName());
    out.writeInt(wrappedSplits.size());
View Full Code Here

      addSplit(readWrappedSplit(in, clazz));
    }
   
    long recordedLength = in.readLong();
    if(recordedLength != length) {
      throw new TezUncheckedException("Expected length: " + recordedLength
          + " actual length: " + length);
    }
    int numLocs = in.readInt();
    if (numLocs > 0) {
      locations = new String[numLocs];
View Full Code Here

      throws IOException {
    InputSplit split;
    try {
      split = ReflectionUtils.newInstance(clazz, conf);
    } catch (Exception e) {
      throw new TezUncheckedException(e);
    }
    split.readFields(in);
    return split;
  }
View Full Code Here

    try {
      inputDescriptor = InputDescriptor.create(useLegacyInput ? MRInputLegacy.class
          .getName() : MRInput.class.getName())
          .setUserPayload(MRInputHelpersInternal.createMRInputPayload(conf, null));
    } catch (IOException e) {
      throw new TezUncheckedException(e);
    }

    DataSourceDescriptor dsd = DataSourceDescriptor.create(inputDescriptor, null, null);
    return dsd;
  }
View Full Code Here

      long minLengthPerGroup = conf.getLong(
          TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_MIN_SIZE,
          TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_MIN_SIZE_DEFAULT);
      if (maxLengthPerGroup < minLengthPerGroup ||
          minLengthPerGroup <=0) {
        throw new TezUncheckedException(
          "Invalid max/min group lengths. Required min>0, max>=min. " +
          " max: " + maxLengthPerGroup + " min: " + minLengthPerGroup);
      }
      if (lengthPerGroup > maxLengthPerGroup) {
        // splits too big to work. Need to override with max size.
        int newDesiredNumSplits = (int)(totalLength/maxLengthPerGroup) + 1;
        LOG.info("Desired splits: " + desiredNumSplits + " too small. " +
            " Desired splitLength: " + lengthPerGroup +
            " Max splitLength: " + maxLengthPerGroup +
            " New desired splits: " + newDesiredNumSplits +
            " Total length: " + totalLength +
            " Original splits: " + originalSplits.length);
       
        desiredNumSplits = newDesiredNumSplits;
      } else if (lengthPerGroup < minLengthPerGroup) {
        // splits too small to work. Need to override with size.
        int newDesiredNumSplits = (int)(totalLength/minLengthPerGroup) + 1;
        LOG.info("Desired splits: " + desiredNumSplits + " too large. " +
            " Desired splitLength: " + lengthPerGroup +
            " Min splitLength: " + minLengthPerGroup +
            " New desired splits: " + newDesiredNumSplits +
            " Total length: " + totalLength +
            " Original splits: " + originalSplits.length);
       
        desiredNumSplits = newDesiredNumSplits;
      }
    }
   
    if (originalSplits == null) {
      LOG.info("Null original splits");
      return null;
    }
   
    if (desiredNumSplits == 0 ||
        originalSplits.length == 0 ||
        desiredNumSplits >= originalSplits.length) {
      // nothing set. so return all the splits as is
      LOG.info("Using original number of splits: " + originalSplits.length +
          " desired splits: " + desiredNumSplits);
      InputSplit[] groupedSplits = new TezGroupedSplit[originalSplits.length];
      int i=0;
      for (InputSplit split : originalSplits) {
        TezGroupedSplit newSplit =
            new TezGroupedSplit(1, wrappedInputFormatName, split.getLocations());
        newSplit.addSplit(split);
        groupedSplits[i++] = newSplit;
      }
      return groupedSplits;
    }
   
    String emptyLocation = "EmptyLocation";
    String[] emptyLocations = {emptyLocation};
    List<InputSplit> groupedSplitsList = new ArrayList<InputSplit>(desiredNumSplits);
   
    long totalLength = 0;
    Map<String, LocationHolder> distinctLocations = new HashMap<String, LocationHolder>();
    // go through splits and add them to locations
    for (InputSplit split : originalSplits) {
      totalLength += split.getLength();
      String[] locations = split.getLocations();
      if (locations == null || locations.length == 0) {
        locations = emptyLocations;
      }
      for (String location : locations ) {
        if (location == null) {
          location = emptyLocation;
        }
        distinctLocations.put(location, null);
      }
    }
   
    long lengthPerGroup = totalLength/desiredNumSplits;
    int numNodeLocations = distinctLocations.size();
    int numSplitsPerLocation = originalSplits.length/numNodeLocations;
    int numSplitsInGroup = originalSplits.length/desiredNumSplits;

    // allocation loop here so that we have a good initial size for the lists
    for (String location : distinctLocations.keySet()) {
      distinctLocations.put(location, new LocationHolder(numSplitsPerLocation+1));
    }
   
    Set<String> locSet = new HashSet<String>();
    for (InputSplit split : originalSplits) {
      locSet.clear();
      SplitHolder splitHolder = new SplitHolder(split);
      String[] locations = split.getLocations();
      if (locations == null || locations.length == 0) {
        locations = emptyLocations;
      }
      for (String location : locations) {
        if (location == null) {
          location = emptyLocation;
        }
        locSet.add(location);
      }
      for (String location : locSet) {
        LocationHolder holder = distinctLocations.get(location);
        holder.splits.add(splitHolder);
      }
    }
   
    boolean groupByLength = conf.getBoolean(
        TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_BY_LENGTH,
        TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_BY_LENGTH_DEFAULT);
    boolean groupByCount = conf.getBoolean(
        TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_BY_COUNT,
        TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_BY_COUNT_DEFAULT);
    if (!(groupByLength || groupByCount)) {
      throw new TezUncheckedException(
          "None of the grouping parameters are true: "
              + TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_BY_LENGTH + ", "
              + TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_BY_COUNT);
    }
    LOG.info("Desired numSplits: " + desiredNumSplits +
        " lengthPerGroup: " + lengthPerGroup +
        " numLocations: " + numNodeLocations +
        " numSplitsPerLocation: " + numSplitsPerLocation +
        " numSplitsInGroup: " + numSplitsInGroup +
        " totalLength: " + totalLength +
        " numOriginalSplits: " + originalSplits.length +
        " . Grouping by length: " + groupByLength + " count: " + groupByCount);
   
    // go through locations and group splits
    int splitsProcessed = 0;
    List<SplitHolder> group = new ArrayList<SplitHolder>(numSplitsInGroup+1);
    Set<String> groupLocationSet = new HashSet<String>(10);
    boolean allowSmallGroups = false;
    boolean doingRackLocal = false;
    int iterations = 0;
    while (splitsProcessed < originalSplits.length) {
      iterations++;
      int numFullGroupsCreated = 0;
      for (Map.Entry<String, LocationHolder> entry : distinctLocations.entrySet()) {
        group.clear();
        groupLocationSet.clear();
        String location = entry.getKey();
        LocationHolder holder = entry.getValue();
        SplitHolder splitHolder = holder.getUnprocessedHeadSplit();
        if (splitHolder == null) {
          // all splits on node processed
          continue;
        }
        int oldHeadIndex = holder.headIndex;
        long groupLength = 0;
        int groupNumSplits = 0;
        do {
          group.add(splitHolder);
          groupLength += splitHolder.split.getLength();
          groupNumSplits++;
          holder.incrementHeadIndex();
          splitHolder = holder.getUnprocessedHeadSplit();
        } while(splitHolder != null 
            && (!groupByLength ||
                (groupLength + splitHolder.split.getLength() <= lengthPerGroup))
            && (!groupByCount ||
                (groupNumSplits + 1 <= numSplitsInGroup)));

        if (holder.isEmpty()
            && !allowSmallGroups
            && (!groupByLength || groupLength < lengthPerGroup/2)
            && (!groupByCount || groupNumSplits < numSplitsInGroup/2)) {
          // group too small, reset it
          holder.headIndex = oldHeadIndex;
          continue;
        }
       
        numFullGroupsCreated++;

        // One split group created
        String[] groupLocation = {location};
        if (location == emptyLocation) {
          groupLocation = null;
        } else if (doingRackLocal) {
          for (SplitHolder splitH : group) {
            String[] locations = splitH.split.getLocations();
            if (locations != null) {
              for (String loc : locations) {
                if (loc != null) {
                  groupLocationSet.add(loc);
                }
              }
            }
          }
          groupLocation = groupLocationSet.toArray(groupLocation);
        }
        TezGroupedSplit groupedSplit =
            new TezGroupedSplit(group.size(), wrappedInputFormatName,
                groupLocation,
                // pass rack local hint directly to AM
                ((doingRackLocal && location != emptyLocation)?location:null));
        for (SplitHolder groupedSplitHolder : group) {
          groupedSplit.addSplit(groupedSplitHolder.split);
          Preconditions.checkState(groupedSplitHolder.isProcessed == false,
              "Duplicates in grouping at location: " + location);
          groupedSplitHolder.isProcessed = true;
          splitsProcessed++;
        }
        if (LOG.isDebugEnabled()) {
          LOG.debug("Grouped " + group.size()
              + " length: " + groupedSplit.getLength()
              + " split at: " + location);
        }
        groupedSplitsList.add(groupedSplit);
      }
     
      if (!doingRackLocal && numFullGroupsCreated < 1) {
        // no node could create a node-local group. go rack-local
        doingRackLocal = true;
        // re-create locations
        int numRemainingSplits = originalSplits.length - splitsProcessed;
        Set<InputSplit> remainingSplits = new HashSet<InputSplit>(numRemainingSplits);
        // gather remaining splits.
        for (Map.Entry<String, LocationHolder> entry : distinctLocations.entrySet()) {
          LocationHolder locHolder = entry.getValue();
          while (!locHolder.isEmpty()) {
            SplitHolder splitHolder = locHolder.getUnprocessedHeadSplit();
            if (splitHolder != null) {
              remainingSplits.add(splitHolder.split);
              locHolder.incrementHeadIndex();
            }
          }
        }
        if (remainingSplits.size() != numRemainingSplits) {
          throw new TezUncheckedException("Expected: " + numRemainingSplits
              + " got: " + remainingSplits.size());
        }
       
        // doing all this now instead of up front because the number of remaining
        // splits is expected to be much smaller
View Full Code Here

      try {
        OutputFormat outputFormat = ReflectionUtils.newInstance(taskContext
            .getOutputFormatClass(), jobConf);
        committer = outputFormat.getOutputCommitter(taskContext);
      } catch (Exception e) {
        throw new TezUncheckedException(e);
      }
    } else {
      committer = ReflectionUtils.newInstance(jobConf.getClass(
          "mapred.output.committer.class", FileOutputCommitter.class,
          org.apache.hadoop.mapred.OutputCommitter.class), jobConf);
View Full Code Here

TOP

Related Classes of org.apache.tez.dag.api.TezUncheckedException

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.