Source Code of org.springframework.yarn.batch.partition.SplitterPartitioner

/*
 * Copyright 2014 the original author or authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.springframework.yarn.batch.partition;


import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;


import org.apache.hadoop.fs.Path;
import org.springframework.batch.core.partition.support.Partitioner;
import org.springframework.batch.item.ExecutionContext;
import org.springframework.core.io.Resource;
import org.springframework.data.hadoop.fs.HdfsResourceLoader;
import org.springframework.data.hadoop.store.split.Split;
import org.springframework.data.hadoop.store.split.Splitter;
import org.springframework.util.Assert;
import org.springframework.util.StringUtils;


/**
 * Implementation of {@link Partitioner} that locates multiple resources and
 * associates their file names with execution context keys.
 * <p>
 * Creates an {@link ExecutionContext} per resource, and labels them as
 * <code>{partition0, partition1, ..., partitionN}</code> where 'partition' part
 * comes from a {@link #getPartitionBaseIdentifier()}.
 * <p>
 * The grid size information passed to method {@link Partitioner#partition(int)}
 * is ignored.
 *
 * @author Janne Valkealahti
 *
 */
public class SplitterPartitioner extends AbstractPartitioner {


  private Splitter splitter;


  private Set<String> inputPatterns;


  @Override
  protected Map<String, ExecutionContext> createPartitions() {
    Map<String, ExecutionContext> contexts = new HashMap<String, ExecutionContext>();


    try {
      int i = 0;
      for (Resource resource : resolveResources()) {
        Assert.state(resource.exists(), "Resource does not exist: " + resource);
        List<Split> inputSplits = splitter.getSplits(new Path(resource.getURI()));
        for (Split split : inputSplits) {
          contexts.put(getPartitionBaseIdentifier() + i++, createExecutionContext(resource, split));
        }
      }
    } catch (IOException e) {
      throw new IllegalArgumentException("Error partitioning splits", e);
    }


    return contexts;
  }


  /**
   * Sets the input patterns.
   *
   * @param inputPatterns the new input patterns
   */
  public void setInputPatterns(String inputPatterns) {
    setInputPatterns(StringUtils.commaDelimitedListToSet(inputPatterns));
  }


  /**
   * Sets the input patterns.
   *
   * @param inputPatterns the new input patterns
   */
  public void setInputPatterns(Set<String> inputPatterns) {
    this.inputPatterns = inputPatterns;
  }


  /**
   * Sets the splitter.
   *
   * @param splitter the new splitter
   */
  public void setSplitter(Splitter splitter) {
    this.splitter = splitter;
  }


  private Set<Resource> resolveResources() throws IOException {
    Set<Resource> resources = new HashSet<Resource>();
    HdfsResourceLoader loader = new HdfsResourceLoader(getConfiguration());
    if (inputPatterns != null) {
      for (String pattern : inputPatterns) {
        resources.addAll(Arrays.asList(loader.getResources(pattern)));
      }
    }
    loader.close();
    return resources;
  }


}
Source Code of org.springframework.yarn.batch.partition.SplitterPartitioner

Related Classes of org.springframework.yarn.batch.partition.SplitterPartitioner