Package org.apache.avro

Source Code of org.apache.avro.RandomData

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.avro;

import java.io.File;
import java.nio.ByteBuffer;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;

import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericArray;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.util.Utf8;

/** Generates schema data as Java objects with random values. */
public class RandomData implements Iterable<Object> {
  private final Schema root;
  private final long seed;
  private final int count;

  public RandomData(Schema schema, int count) {
    this(schema, count, System.currentTimeMillis());
  }

  public RandomData(Schema schema, int count, long seed) {
    this.root = schema;
    this.seed = seed;
    this.count = count;
  }
 
  public Iterator<Object> iterator() {
    return new Iterator<Object>() {
      private int n;
      private Random random = new Random(seed);
      public boolean hasNext() { return n < count; }
      public Object next() {
        n++;
        return generate(root, random, 0);
      }
      public void remove() { throw new UnsupportedOperationException(); }
    };
  }
 
  @SuppressWarnings(value="unchecked")
  private static Object generate(Schema schema, Random random, int d) {
    switch (schema.getType()) {
    case RECORD:
      GenericRecord record = new GenericData.Record(schema);
      for (Schema.Field field : schema.getFields())
        record.put(field.name(), generate(field.schema(), random, d+1));
      return record;
    case ENUM:
      List<String> symbols = schema.getEnumSymbols();
      return new GenericData.EnumSymbol(symbols.get(random.nextInt(symbols.size())));
    case ARRAY:
      int length = (random.nextInt(5)+2)-d;
      GenericArray<Object> array =
        new GenericData.Array(length<=0?0:length, schema);
      for (int i = 0; i < length; i++)
        array.add(generate(schema.getElementType(), random, d+1));
      return array;
    case MAP:
      length = (random.nextInt(5)+2)-d;
      Map<Object,Object> map = new HashMap<Object,Object>(length<=0?0:length);
      for (int i = 0; i < length; i++) {
        map.put(randomUtf8(random, 40),
                generate(schema.getValueType(), random, d+1));
      }
      return map;
    case UNION:
      List<Schema> types = schema.getTypes();
      return generate(types.get(random.nextInt(types.size())), random, d);
    case FIXED:
      byte[] bytes = new byte[schema.getFixedSize()];
      random.nextBytes(bytes);
      return new GenericData.Fixed(bytes);
    case STRING:  return randomUtf8(random, 40);
    case BYTES:   return randomBytes(random, 40);
    case INT:     return random.nextInt();
    case LONG:    return random.nextLong();
    case FLOAT:   return random.nextFloat();
    case DOUBLE:  return random.nextDouble();
    case BOOLEAN: return random.nextBoolean();
    case NULL:    return null;
    default: throw new RuntimeException("Unknown type: "+schema);
    }
  }

  private static Utf8 randomUtf8(Random rand, int maxLength) {
    Utf8 utf8 = new Utf8().setLength(rand.nextInt(maxLength));
    for (int i = 0; i < utf8.getLength(); i++) {
      utf8.getBytes()[i] = (byte)('a'+rand.nextInt('z'-'a'));
    }
    return utf8;
  }

  private static ByteBuffer randomBytes(Random rand, int maxLength) {
    ByteBuffer bytes = ByteBuffer.allocate(rand.nextInt(maxLength));
    bytes.limit(bytes.capacity());
    rand.nextBytes(bytes.array());
    return bytes;
  }

  public static void main(String[] args) throws Exception {
    if(args.length != 3) {
      System.out.println("Usage: RandomData <schemafile> <outputfile> <count>");
      System.exit(-1);
    }
    Schema sch = Schema.parse(new File(args[0]));
    DataFileWriter<Object> writer =
      new DataFileWriter<Object>(new GenericDatumWriter<Object>())
      .create(sch, new File(args[1]));
    try {
      for (Object datum : new RandomData(sch, Integer.parseInt(args[2]))) {
        writer.append(datum);
      }
    } finally {
      writer.close();
    }
  }
}
TOP

Related Classes of org.apache.avro.RandomData

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.