Package org.apache.crunch.types.avro

Source Code of org.apache.crunch.types.avro.AvroKeyValueTableType$AvroKeyValueRecordToPair

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
*  Unless required by applicable law or agreed to in writing, software
*  distributed under the License is distributed on an "AS IS" BASIS,
*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*  See the License for the specific language governing permissions and
*  limitations under the License.
*/

package org.apache.crunch.types.avro;

import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.hadoop.io.AvroKeyValue;
import org.apache.crunch.MapFn;
import org.apache.crunch.Pair;
import org.apache.crunch.lib.PTables;
import org.apache.crunch.types.PGroupedTableType;
import org.apache.crunch.types.PTableType;
import org.apache.crunch.types.PType;
import org.apache.crunch.types.TupleDeepCopier;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.TaskInputOutputContext;

/**
* A {@code PTableType} that is compatible with Avro key/value files that are created or read using the
* {@code org.apache.avro.mapreduce.AvroJob} class.
*/
class AvroKeyValueTableType<K, V> extends BaseAvroTableType<K, V> implements PTableType<K, V> {

  private static class PairToAvroKeyValueRecord extends MapFn<Pair, GenericRecord> {
    private final MapFn keyMapFn;
    private final MapFn valueMapFn;
    private final String keySchemaJson;
    private final String valueSchemaJson;

    private String keyValueSchemaJson;
    private transient Schema keyValueSchema;

    public PairToAvroKeyValueRecord(AvroType keyType, AvroType valueType) {
      this.keyMapFn = keyType.getOutputMapFn();
      this.keySchemaJson = keyType.getSchema().toString();
      this.valueMapFn = valueType.getOutputMapFn();
      this.valueSchemaJson = valueType.getSchema().toString();
    }

    @Override
    public void configure(Configuration conf) {
      keyMapFn.configure(conf);
      valueMapFn.configure(conf);
    }

    @Override
    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
      keyMapFn.setContext(context);
      valueMapFn.setContext(context);
    }

    @Override
    public void initialize() {
      keyMapFn.initialize();
      valueMapFn.initialize();
      Schema.Parser parser = new Schema.Parser();
      keyValueSchemaJson = AvroKeyValue.getSchema(parser.parse(keySchemaJson), parser.parse(valueSchemaJson)).toString();
    }

    @Override
    public GenericRecord map(Pair input) {
      if (keyValueSchema == null) {
        keyValueSchema = new Schema.Parser().parse(keyValueSchemaJson);
      }
      GenericRecord keyValueRecord = new GenericData.Record(keyValueSchema);
      keyValueRecord.put(AvroKeyValue.KEY_FIELD, keyMapFn.map(input.first()));
      keyValueRecord.put(AvroKeyValue.VALUE_FIELD, valueMapFn.map(input.second()));
      return keyValueRecord;
    }
  }

  private static class AvroKeyValueRecordToPair extends MapFn<GenericRecord, Pair> {

    private final MapFn firstMapFn;
    private final MapFn secondMapFn;

    public AvroKeyValueRecordToPair(MapFn firstMapFn, MapFn secondMapFn) {
      this.firstMapFn = firstMapFn;
      this.secondMapFn = secondMapFn;
    }

    @Override
    public void configure(Configuration conf) {
      firstMapFn.configure(conf);
      secondMapFn.configure(conf);
    }

    @Override
    public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) {
      firstMapFn.setContext(context);
      secondMapFn.setContext(context);
    }

    @Override
    public void initialize() {
      firstMapFn.initialize();
      secondMapFn.initialize();
    }

    @Override
    public Pair map(GenericRecord input) {
      return Pair.of(
          firstMapFn.map(input.get(AvroKeyValue.KEY_FIELD)),
          secondMapFn.map(input.get(AvroKeyValue.VALUE_FIELD)));
    }
  }

  private final AvroType<K> keyType;
  private final AvroType<V> valueType;

  public AvroKeyValueTableType(AvroType<K> keyType, AvroType<V> valueType, Class<Pair<K, V>> pairClass) {
    super(pairClass, AvroKeyValue.getSchema(keyType.getSchema(), valueType.getSchema()),
        new AvroKeyValueRecordToPair(keyType.getInputMapFn(), valueType.getInputMapFn()),
        new PairToAvroKeyValueRecord(keyType, valueType),
        new TupleDeepCopier(Pair.class, keyType, valueType),
        null, keyType, valueType);
    this.keyType = keyType;
    this.valueType = valueType;
  }

  @Override
  public PType<K> getKeyType() {
    return keyType;
  }

  @Override
  public PType<V> getValueType() {
    return valueType;
  }

  @Override
  public PGroupedTableType<K, V> getGroupedTableType() {
    return new AvroGroupedTableType<K, V>(this);
  }

  @Override
  public Pair<K, V> getDetachedValue(Pair<K, V> value) {
    return PTables.getDetachedValue(this, value);
  }
}
TOP

Related Classes of org.apache.crunch.types.avro.AvroKeyValueTableType$AvroKeyValueRecordToPair

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.