Package org.apache.trevni.avro

Source Code of org.apache.trevni.avro.AvroColumnWriter

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.trevni.avro;

import java.io.IOException;
import java.io.File;
import java.io.OutputStream;
import java.util.Collection;
import java.util.Map;

import org.apache.trevni.ColumnFileMetaData;
import org.apache.trevni.ColumnFileWriter;
import org.apache.trevni.TrevniRuntimeException;

import org.apache.avro.Schema;
import org.apache.avro.Schema.Field;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericFixed;
import org.apache.avro.util.Utf8;

import static org.apache.trevni.avro.AvroColumnator.isSimple;

/** Write Avro records to a Trevni column file.
*
* <p>Each primitive type is written to a separate column.
*
* <p>Output is buffered until {@link #writeTo(OutputStream)} is called.  The
* {@link #sizeEstimate()} indicates both the amount of data buffered and the
* size of the file that will be written.
*/
public class AvroColumnWriter<D> {
  private Schema schema;
  private GenericData model;
  private ColumnFileWriter writer;
  private int[] arrayWidths;

  public static final String SCHEMA_KEY = "avro.schema";

  public AvroColumnWriter(Schema s, ColumnFileMetaData meta)
    throws IOException {
    this(s, meta, GenericData.get());
  }

  public AvroColumnWriter(Schema s, ColumnFileMetaData meta, GenericData model)
    throws IOException {
    this.schema = s;
    AvroColumnator columnator = new AvroColumnator(s);
    meta.set(SCHEMA_KEY, s.toString());           // save schema in file
    this.writer = new ColumnFileWriter(meta, columnator.getColumns());
    this.arrayWidths = columnator.getArrayWidths();
    this.model = model;
  }

  /** Return the approximate size of the file that will be written.  Tries to
   * slightly over-estimate.  Indicates both the size in memory of the buffered
   * data as well as the size of the file that will be written by {@link
   * #writeTo(OutputStream)}. */
  public long sizeEstimate() { return writer.sizeEstimate(); }

  /** Write all rows added to the named output stream. */
  public void writeTo(OutputStream out) throws IOException {
    writer.writeTo(out);
  }

  /** Write all rows added to the named file. */
  public void writeTo(File file) throws IOException {
    writer.writeTo(file);
  }

  /** Add a row to the file. */
  public void write(D value) throws IOException {
    writer.startRow();
    int count = write(value, schema, 0);
    assert(count == writer.getColumnCount());
    writer.endRow();
  }
 
  private int write(Object o, Schema s, int column) throws IOException {
    if (isSimple(s)) {
      writeValue(o, s, column);
      return column+1;
    }
    switch (s.getType()) {
    case MAP:
      Map<?,?> map = (Map)o;
      writer.writeLength(map.size(), column);
      for (Map.Entry e : map.entrySet()) {
        writer.writeValue(null, column);
        writer.writeValue(e.getKey(), column+1);
        int c = write(e.getValue(), s.getValueType(), column+2);
        assert(c == column+arrayWidths[column]);
      }
      return column+arrayWidths[column];
    case RECORD:
      for (Field f : s.getFields())
        column = write(model.getField(o,f.name(),f.pos()), f.schema(), column);
      return column;
    case ARRAY:
      Collection elements = (Collection)o;
      writer.writeLength(elements.size(), column);
      if (isSimple(s.getElementType())) {         // optimize simple arrays
        for (Object element : elements)
          writeValue(element, s.getElementType(), column);
        return column+1;
      }
      for (Object element : elements) {
        writer.writeValue(null, column);
        int c = write(element, s.getElementType(), column+1);
        assert(c == column+arrayWidths[column]);
      }
      return column+arrayWidths[column];
    case UNION:
      int b = model.resolveUnion(s, o);
      int i = 0;
      for (Schema branch : s.getTypes()) {
        boolean selected = i++ == b;
        if (branch.getType() == Schema.Type.NULL) continue;
        if (!selected) {
          writer.writeLength(0, column);
          column+=arrayWidths[column];
        } else {
          writer.writeLength(1, column);
          if (isSimple(branch)) {
            writeValue(o, branch, column++);
          } else {
            writer.writeValue(null, column);
            column = write(o, branch, column+1);
          }
        }
      }
      return column;
    default:
      throw new TrevniRuntimeException("Unknown schema: "+s);
    }
  }

  private void writeValue(Object value, Schema s, int column)
    throws IOException {
   
    switch (s.getType()) {
    case STRING:
      if (value instanceof Utf8)                    // convert Utf8 to String
        value = value.toString();
      break;
    case ENUM:
      if (value instanceof Enum)
        value = ((Enum)value).ordinal();
      else
        value = s.getEnumOrdinal(value.toString());
      break;
    case FIXED:
      value = ((GenericFixed)value).bytes();
      break;
    }
    writer.writeValue(value, column);
  }

}
TOP

Related Classes of org.apache.trevni.avro.AvroColumnWriter

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.