Package org.apache.crunch.types.writable

Source Code of org.apache.crunch.types.writable.Writables$TupleTWMapFn

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.crunch.types.writable;

import java.io.ByteArrayInputStream;
import java.io.DataInputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Collection;
import java.util.List;
import java.util.Map;

import org.apache.crunch.CrunchRuntimeException;
import org.apache.crunch.MapFn;
import org.apache.crunch.Pair;
import org.apache.crunch.Tuple;
import org.apache.crunch.Tuple3;
import org.apache.crunch.Tuple4;
import org.apache.crunch.TupleN;
import org.apache.crunch.fn.CompositeMapFn;
import org.apache.crunch.fn.IdentityFn;
import org.apache.crunch.types.PType;
import org.apache.crunch.types.PTypes;
import org.apache.crunch.types.TupleFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.MapWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableFactories;
import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.mapreduce.TaskInputOutputContext;

import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;

/**
* Defines static methods that are analogous to the methods defined in
* {@link WritableTypeFamily} for convenient static importing.
*
*/
public class Writables {
  private static final MapFn<NullWritable, Void> NULL_WRITABLE_TO_VOID = new MapFn<NullWritable, Void>() {
    @Override
    public Void map(NullWritable input) {
      return null;
    }
  };

  private static final MapFn<Void, NullWritable> VOID_TO_NULL_WRITABLE = new MapFn<Void, NullWritable>() {
    @Override
    public NullWritable map(Void input) {
      return NullWritable.get();
    }
  };

  private static final MapFn<Text, String> TEXT_TO_STRING = new MapFn<Text, String>() {
    @Override
    public String map(Text input) {
      return input.toString();
    }
  };

  private static final MapFn<String, Text> STRING_TO_TEXT = new MapFn<String, Text>() {
    @Override
    public Text map(String input) {
      return new Text(input);
    }
  };

  private static final MapFn<IntWritable, Integer> IW_TO_INT = new MapFn<IntWritable, Integer>() {
    @Override
    public Integer map(IntWritable input) {
      return input.get();
    }
  };

  private static final MapFn<Integer, IntWritable> INT_TO_IW = new MapFn<Integer, IntWritable>() {
    @Override
    public IntWritable map(Integer input) {
      return new IntWritable(input);
    }
  };

  private static final MapFn<LongWritable, Long> LW_TO_LONG = new MapFn<LongWritable, Long>() {
    @Override
    public Long map(LongWritable input) {
      return input.get();
    }
  };

  private static final MapFn<Long, LongWritable> LONG_TO_LW = new MapFn<Long, LongWritable>() {
    @Override
    public LongWritable map(Long input) {
      return new LongWritable(input);
    }
  };

  private static final MapFn<FloatWritable, Float> FW_TO_FLOAT = new MapFn<FloatWritable, Float>() {
    @Override
    public Float map(FloatWritable input) {
      return input.get();
    }
  };

  private static final MapFn<Float, FloatWritable> FLOAT_TO_FW = new MapFn<Float, FloatWritable>() {
    @Override
    public FloatWritable map(Float input) {
      return new FloatWritable(input);
    }
  };

  private static final MapFn<DoubleWritable, Double> DW_TO_DOUBLE = new MapFn<DoubleWritable, Double>() {
    @Override
    public Double map(DoubleWritable input) {
      return input.get();
    }
  };

  private static final MapFn<Double, DoubleWritable> DOUBLE_TO_DW = new MapFn<Double, DoubleWritable>() {
    @Override
    public DoubleWritable map(Double input) {
      return new DoubleWritable(input);
    }
  };

  private static final MapFn<BooleanWritable, Boolean> BW_TO_BOOLEAN = new MapFn<BooleanWritable, Boolean>() {
    @Override
    public Boolean map(BooleanWritable input) {
      return input.get();
    }
  };

  private static final BooleanWritable TRUE = new BooleanWritable(true);
  private static final BooleanWritable FALSE = new BooleanWritable(false);
  private static final MapFn<Boolean, BooleanWritable> BOOLEAN_TO_BW = new MapFn<Boolean, BooleanWritable>() {
    @Override
    public BooleanWritable map(Boolean input) {
      return input == Boolean.TRUE ? TRUE : FALSE;
    }
  };

  private static final MapFn<BytesWritable, ByteBuffer> BW_TO_BB = new MapFn<BytesWritable, ByteBuffer>() {
    @Override
    public ByteBuffer map(BytesWritable input) {
      return ByteBuffer.wrap(input.getBytes(), 0, input.getLength());
    }
  };

  private static final MapFn<ByteBuffer, BytesWritable> BB_TO_BW = new MapFn<ByteBuffer, BytesWritable>() {
    @Override
    public BytesWritable map(ByteBuffer input) {
      BytesWritable bw = new BytesWritable();
      bw.set(input.array(), input.arrayOffset(), input.limit());
      return bw;
    }
  };

  private static <S, W extends Writable> WritableType<S, W> create(Class<S> typeClass, Class<W> writableClass,
      MapFn<W, S> inputDoFn, MapFn<S, W> outputDoFn) {
    return new WritableType<S, W>(typeClass, writableClass, inputDoFn, outputDoFn);
  }

  private static final WritableType<Void, NullWritable> nulls = create(Void.class, NullWritable.class,
      NULL_WRITABLE_TO_VOID, VOID_TO_NULL_WRITABLE);
  private static final WritableType<String, Text> strings = create(String.class, Text.class, TEXT_TO_STRING,
      STRING_TO_TEXT);
  private static final WritableType<Long, LongWritable> longs = create(Long.class, LongWritable.class, LW_TO_LONG,
      LONG_TO_LW);
  private static final WritableType<Integer, IntWritable> ints = create(Integer.class, IntWritable.class, IW_TO_INT,
      INT_TO_IW);
  private static final WritableType<Float, FloatWritable> floats = create(Float.class, FloatWritable.class,
      FW_TO_FLOAT, FLOAT_TO_FW);
  private static final WritableType<Double, DoubleWritable> doubles = create(Double.class, DoubleWritable.class,
      DW_TO_DOUBLE, DOUBLE_TO_DW);
  private static final WritableType<Boolean, BooleanWritable> booleans = create(Boolean.class, BooleanWritable.class,
      BW_TO_BOOLEAN, BOOLEAN_TO_BW);
  private static final WritableType<ByteBuffer, BytesWritable> bytes = create(ByteBuffer.class, BytesWritable.class,
      BW_TO_BB, BB_TO_BW);

  private static final Map<Class<?>, PType<?>> PRIMITIVES = ImmutableMap.<Class<?>, PType<?>> builder()
      .put(String.class, strings).put(Long.class, longs).put(Integer.class, ints).put(Float.class, floats)
      .put(Double.class, doubles).put(Boolean.class, booleans).put(ByteBuffer.class, bytes).build();

  private static final Map<Class<?>, WritableType<?, ?>> EXTENSIONS = Maps.newHashMap();

  public static <T> PType<T> getPrimitiveType(Class<T> clazz) {
    return (PType<T>) PRIMITIVES.get(clazz);
  }

  public static <T> void register(Class<T> clazz, WritableType<T, ? extends Writable> ptype) {
    EXTENSIONS.put(clazz, ptype);
  }

  public static final WritableType<Void, NullWritable> nulls() {
    return nulls;
  }

  public static final WritableType<String, Text> strings() {
    return strings;
  }

  public static final WritableType<Long, LongWritable> longs() {
    return longs;
  }

  public static final WritableType<Integer, IntWritable> ints() {
    return ints;
  }

  public static final WritableType<Float, FloatWritable> floats() {
    return floats;
  }

  public static final WritableType<Double, DoubleWritable> doubles() {
    return doubles;
  }

  public static final WritableType<Boolean, BooleanWritable> booleans() {
    return booleans;
  }

  public static final WritableType<ByteBuffer, BytesWritable> bytes() {
    return bytes;
  }

  public static final <T, W extends Writable> WritableType<T, W> records(Class<T> clazz) {
    if (EXTENSIONS.containsKey(clazz)) {
      return (WritableType<T, W>) EXTENSIONS.get(clazz);
    }
    if (Writable.class.isAssignableFrom(clazz)) {
      return (WritableType<T, W>) writables(clazz.asSubclass(Writable.class));
    } else {
      throw new IllegalArgumentException(
          "Cannot create Writable records from non-Writable class"+ clazz.getCanonicalName());
    }
  }

  public static <W extends Writable> WritableType<W, W> writables(Class<W> clazz) {
    MapFn wIdentity = IdentityFn.getInstance();
    return new WritableType<W, W>(clazz, clazz, wIdentity, wIdentity);
  }

  public static <K, V> WritableTableType<K, V> tableOf(PType<K> key, PType<V> value) {
    if (key instanceof WritableTableType) {
      WritableTableType wtt = (WritableTableType) key;
      key = pairs(wtt.getKeyType(), wtt.getValueType());
    } else if (!(key instanceof WritableType)) {
      throw new IllegalArgumentException("Key type must be of class WritableType");
    }
    if (value instanceof WritableTableType) {
      WritableTableType wtt = (WritableTableType) value;
      value = pairs(wtt.getKeyType(), wtt.getValueType());
    } else if (!(value instanceof WritableType)) {
      throw new IllegalArgumentException("Value type must be of class WritableType");
    }
    return new WritableTableType((WritableType) key, (WritableType) value);
  }

  private static <W extends Writable> W create(Class<W> clazz, BytesWritable bytes) {
    W instance = (W) WritableFactories.newInstance(clazz);
    try {
      instance.readFields(new DataInputStream(new ByteArrayInputStream(bytes.getBytes())));
    } catch (IOException e) {
      throw new CrunchRuntimeException(e);
    }
    return instance;
  }
 
  /**
   * For mapping from {@link TupleWritable} instances to {@link Tuple}s.
   *
   */
  private static class TWTupleMapFn extends MapFn<TupleWritable, Tuple> {
    private final TupleFactory<?> tupleFactory;
    private final List<MapFn> fns;
    private final List<Class<Writable>> writableClasses;
   
    private transient Object[] values;

    public TWTupleMapFn(TupleFactory<?> tupleFactory, WritableType<?, ?>... ptypes) {
      this.tupleFactory = tupleFactory;
      this.fns = Lists.newArrayList();
      this.writableClasses = Lists.newArrayList();
      for (WritableType ptype : ptypes) {
        fns.add(ptype.getInputMapFn());
        writableClasses.add(ptype.getSerializationClass());
      }
    }

    @Override
    public void configure(Configuration conf) {
      for (MapFn fn : fns) {
        fn.configure(conf);
      }
    }

    @Override
    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
      for (MapFn fn : fns) {
        fn.setContext(context);
      }
    }
   
    @Override
    public void initialize() {
      for (MapFn fn : fns) {
        fn.initialize();
      }
      // The rest of the methods allocate new
      // objects each time. However this one
      // uses Tuple.tuplify which does a copy
      this.values = new Object[fns.size()];
      tupleFactory.initialize();
    }

    @Override
    public Tuple map(TupleWritable in) {
      for (int i = 0; i < values.length; i++) {
        if (in.has(i)) {
          Writable w = create(writableClasses.get(i), in.get(i));
          values[i] = fns.get(i).map(w);
        } else {
          values[i] = null;
        }
      }
      return tupleFactory.makeTuple(values);
    }
  }

  /**
   * For mapping from {@code Tuple}s to {@code TupleWritable}s.
   *
   */
  private static class TupleTWMapFn extends MapFn<Tuple, TupleWritable> {

    private transient TupleWritable writable;
    private transient BytesWritable[] values;

    private final List<MapFn> fns;
    private final List<Class<Writable>> writableClasses;
   
    public TupleTWMapFn(PType<?>... ptypes) {
      this.fns = Lists.newArrayList();
      this.writableClasses = Lists.newArrayList();
      for (PType<?> ptype : ptypes) {
        fns.add(ptype.getOutputMapFn());
        writableClasses.add(((WritableType) ptype).getSerializationClass());
      }
    }

    @Override
    public void configure(Configuration conf) {
      for (MapFn fn : fns) {
        fn.configure(conf);
      }
    }

    @Override
    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
      for (MapFn fn : fns) {
        fn.setContext(context);
      }
    }
   
    @Override
    public void initialize() {
      this.values = new BytesWritable[fns.size()];
      this.writable = new TupleWritable(values);
      this.writable.setWritableClasses(writableClasses);
      for (MapFn fn : fns) {
        fn.initialize();
      }
    }

    @Override
    public TupleWritable map(Tuple input) {
      writable.clearWritten();
      for (int i = 0; i < input.size(); i++) {
        Object value = input.get(i);
        if (value != null) {
          writable.setWritten(i);
          Writable w = (Writable) fns.get(i).map(value);
          values[i] = new BytesWritable(WritableUtils.toByteArray(w));
        }
      }
      return writable;
    }
  }

  public static <V1, V2> WritableType<Pair<V1, V2>, TupleWritable> pairs(PType<V1> p1, PType<V2> p2) {
    TWTupleMapFn input = new TWTupleMapFn(TupleFactory.PAIR, (WritableType) p1, (WritableType) p2);
    TupleTWMapFn output = new TupleTWMapFn(p1, p2);
    return new WritableType(Pair.class, TupleWritable.class, input, output, p1, p2);
  }

  public static <V1, V2, V3> WritableType<Tuple3<V1, V2, V3>, TupleWritable> triples(PType<V1> p1, PType<V2> p2,
      PType<V3> p3) {
    TWTupleMapFn input = new TWTupleMapFn(TupleFactory.TUPLE3, (WritableType) p1,
        (WritableType) p2, (WritableType) p3);
    TupleTWMapFn output = new TupleTWMapFn(p1, p2, p3);
    return new WritableType(Tuple3.class, TupleWritable.class, input, output, p1, p2, p3);
  }

  public static <V1, V2, V3, V4> WritableType<Tuple4<V1, V2, V3, V4>, TupleWritable> quads(PType<V1> p1, PType<V2> p2,
      PType<V3> p3, PType<V4> p4) {
    TWTupleMapFn input = new TWTupleMapFn(TupleFactory.TUPLE4, (WritableType) p1,
        (WritableType) p2, (WritableType) p3, (WritableType) p4);
    TupleTWMapFn output = new TupleTWMapFn(p1, p2, p3, p4);
    return new WritableType(Tuple4.class, TupleWritable.class, input, output, p1, p2, p3, p4);
  }

  public static WritableType<TupleN, TupleWritable> tuples(PType... ptypes) {
    WritableType[] wt = new WritableType[ptypes.length];
    for (int i = 0; i < wt.length; i++) {
      wt[i] = (WritableType) ptypes[i];
    }
    TWTupleMapFn input = new TWTupleMapFn(TupleFactory.TUPLEN, wt);
    TupleTWMapFn output = new TupleTWMapFn(ptypes);
    return new WritableType(TupleN.class, TupleWritable.class, input, output, ptypes);
  }

  public static <T extends Tuple> PType<T> tuples(Class<T> clazz, PType... ptypes) {
    Class[] typeArgs = new Class[ptypes.length];
    WritableType[] wt = new WritableType[ptypes.length];
    for (int i = 0; i < typeArgs.length; i++) {
      typeArgs[i] = ptypes[i].getTypeClass();
      wt[i] = (WritableType) ptypes[i];
    }
    TupleFactory<T> factory = TupleFactory.create(clazz, typeArgs);
    TWTupleMapFn input = new TWTupleMapFn(factory, wt);
    TupleTWMapFn output = new TupleTWMapFn(ptypes);
    return new WritableType(clazz, TupleWritable.class, input, output, ptypes);
  }

  public static <S, T> PType<T> derived(Class<T> clazz, MapFn<S, T> inputFn, MapFn<T, S> outputFn, PType<S> base) {
    WritableType<S, ?> wt = (WritableType<S, ?>) base;
    MapFn input = new CompositeMapFn(wt.getInputMapFn(), inputFn);
    MapFn output = new CompositeMapFn(outputFn, wt.getOutputMapFn());
    return new WritableType(clazz, wt.getSerializationClass(), input, output, base.getSubTypes().toArray(new PType[0]));
  }

  private static class ArrayCollectionMapFn<T> extends MapFn<GenericArrayWritable, Collection<T>> {
    private Class<Writable> clazz;
    private final MapFn<Object, T> mapFn;
   
    public ArrayCollectionMapFn(Class<Writable> clazz, MapFn<Object, T> mapFn) {
      this.clazz = clazz;
      this.mapFn = mapFn;
    }

    @Override
    public void configure(Configuration conf) {
      mapFn.configure(conf);
    }

    @Override
    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
      mapFn.setContext(context);
    }
   
    @Override
    public void initialize() {
      mapFn.initialize();
    }

    @Override
    public Collection<T> map(GenericArrayWritable input) {
      Collection<T> collection = Lists.newArrayList();
      for (BytesWritable raw : input.get()) {
        Writable w = create(clazz, raw);
        collection.add(mapFn.map(w));
      }
      return collection;
    }
  }

  private static class CollectionArrayMapFn<T> extends MapFn<Collection<T>, GenericArrayWritable> {

    private final MapFn<T, Object> mapFn;

    public CollectionArrayMapFn(MapFn<T, Object> mapFn) {
      this.mapFn = mapFn;
    }

    @Override
    public void configure(Configuration conf) {
      mapFn.configure(conf);
    }

    @Override
    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
      mapFn.setContext(context);
    }
   
    @Override
    public void initialize() {
      mapFn.initialize();
    }

    @Override
    public GenericArrayWritable map(Collection<T> input) {
      GenericArrayWritable arrayWritable = new GenericArrayWritable();
      BytesWritable[] w = new BytesWritable[input.size()];
      int index = 0;
      for (T in : input) {
        Writable v = (Writable) mapFn.map(in);
        w[index++] = new BytesWritable(WritableUtils.toByteArray(v));
      }
      arrayWritable.set(w);
      return arrayWritable;
    }
  }

  public static <T> WritableType<Collection<T>, GenericArrayWritable> collections(PType<T> ptype) {
    WritableType<T, ?> wt = (WritableType<T, ?>) ptype;
    return new WritableType(Collection.class, GenericArrayWritable.class,
        new ArrayCollectionMapFn(wt.getSerializationClass(), wt.getInputMapFn()),
        new CollectionArrayMapFn(wt.getOutputMapFn()), ptype);
  }

  private static class MapInputMapFn<T> extends MapFn<TextMapWritable, Map<String, T>> {
    private final Class<Writable> clazz;
    private final MapFn<Writable, T> mapFn;

    public MapInputMapFn(Class<Writable> clazz, MapFn<Writable, T> mapFn) {
      this.clazz = clazz;
      this.mapFn = mapFn;
    }

    @Override
    public void configure(Configuration conf) {
      mapFn.configure(conf);
    }

    @Override
    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
      mapFn.setContext(context);
    }
   
    @Override
    public void initialize() {
      mapFn.initialize();
    }

    @Override
    public Map<String, T> map(TextMapWritable input) {
      Map<String, T> out = Maps.newHashMap();
      for (Map.Entry<Text, BytesWritable> e : input.entrySet()) {
        Writable v = create(clazz, e.getValue());
        out.put(e.getKey().toString(), mapFn.map(v));
      }
      return out;
    }
  }

  private static class MapOutputMapFn<T> extends MapFn<Map<String, T>, TextMapWritable> {

    private final MapFn<T, Writable> mapFn;

    public MapOutputMapFn(MapFn<T, Writable> mapFn) {
      this.mapFn = mapFn;
    }

    @Override
    public void configure(Configuration conf) {
      mapFn.configure(conf);
    }

    @Override
    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
      mapFn.setContext(context);
    }
   
    @Override
    public void initialize() {
      mapFn.initialize();
    }

    @Override
    public TextMapWritable map(Map<String, T> input) {
      TextMapWritable tmw = new TextMapWritable();
      for (Map.Entry<String, T> e : input.entrySet()) {
        Writable w = mapFn.map(e.getValue());
        tmw.put(new Text(e.getKey()), new BytesWritable(WritableUtils.toByteArray(w)));
      }
      return tmw;
    }
  }

  public static <T> WritableType<Map<String, T>, MapWritable> maps(PType<T> ptype) {
    WritableType<T, ?> wt = (WritableType<T, ?>) ptype;
    return new WritableType(Map.class, TextMapWritable.class,
        new MapInputMapFn(wt.getSerializationClass(), wt.getInputMapFn()),
        new MapOutputMapFn(wt.getOutputMapFn()), ptype);
  }

  public static <T> PType<T> jsons(Class<T> clazz) {
    return PTypes.jsonString(clazz, WritableTypeFamily.getInstance());
  }

  // Not instantiable
  private Writables() {
  }
}
TOP

Related Classes of org.apache.crunch.types.writable.Writables$TupleTWMapFn

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.