Package com.datasalt.pangool.io.Schema

Examples of com.datasalt.pangool.io.Schema.Field


  public AvroTopicalWordCount() {
    super("Usage: AvroTopicalWordCount [input_path] [output_path]");
  }

  static Schema getSchema() {
    Field avroField = Fields.createAvroField("my_avro", getAvroSchema(), false);
    return new Schema("schema", Arrays.asList(avroField));
  }
View Full Code Here


    if(inputFormat == null) {
      throw new IllegalArgumentException("Input format can't be null");
    }
    List<Field> fields = new ArrayList<Field>();
    for(String partitionField: partitionFields) {
      Field field = schema.getField(partitionField);
      if(field == null) {
        throw new IllegalArgumentException("Partition field not contained in input schema: " + partitionField);
      }
      fields.add(field);
    }
View Full Code Here

      if(partitionByFields != null) {
        partitionBySchemaFields = new Field[partitionByFields.length];
        int i = 0;
        for(String partitionByField : partitionByFields) {
          partitionByField = partitionByField.trim();
          Field partitionField = schema.getField(partitionByField);
          if(partitionField == null) {
            throw new TableBuilderException("Invalid partition field: " + partitionByField
                + " not present in its Schema: " + schema + ".");
          }
          partitionBySchemaFields[i] = partitionField;
          i++;
        }
      }
    } else {
      if(partitionByFields != null) {
        throw new TableBuilderException(
            "Replicated table with partition fields is an inconsistent specification. Please check if you are doing something wrong.");
      }
    }

    // Indexes
    List<FieldIndex> indexes = new ArrayList<FieldIndex>();
    for(String fieldToIndex : fieldsToIndex) {
      fieldToIndex = fieldToIndex.trim();
      // Check that field exists in schema
      Field field1 = schema.getField(fieldToIndex);
      if(field1 == null) {
        throw new TableBuilderException("Invalid field to index: " + fieldToIndex
            + " not present in specified Schema: " + schema + ".");
      }
      indexes.add(new FieldIndex(field1));
    }
    // Also, support for compound indexes
    for(List<String> compoundIndex : compoundIndexes) {
      List<Field> compoundIndexFields = new ArrayList<Field>();
      for(String field : compoundIndex) {
        field = field.trim();
        // Check that each field exists in schema
        Field field2 = schema.getField(field);
        if(field2 == null) {
          throw new TableBuilderException("Invalid compound index: " + compoundIndex + ", field "
              + field + " not present in specified Schema: " + schema + ".");
        }
        compoundIndexFields.add(field2);
View Full Code Here

    TupleMRConfigBuilder b = new TupleMRConfigBuilder();
    List<Field> fields = new ArrayList<Field>();
    fields.add(Field.createObject("my_object", A.class));
    b.addIntermediateSchema(new Schema("schema1", fields));
    fields.clear();
    Field avroField = Field.createObject("my_object", Object.class);
    avroField.setObjectSerialization(AvroFieldSerialization.class);
    fields.add(avroField);
    b.addIntermediateSchema(new Schema("schema2", fields));
    b.setGroupByFields("my_object");
    b.buildConf();
  }
View Full Code Here

    bis.close();
  }
 
  @Test
  public void testDefaultValuesPerformance() throws IOException {
    Field cField = Field.create("c", Type.DOUBLE, true, 100d);
    Field dField = Field.create("d", Type.LONG, true, 1000l);
   
    Schema schema = new Schema("schema", Fields.parse("a:string, b:int?"));
    Schema targetSchema = Mutator.superSetOf(schema, cField, dField);
   
    Configuration conf = new Configuration();
View Full Code Here

   */
  protected static OrderBy createRandomSortCriteria(Schema schema, int numFields)
      throws TupleMRException {
    List<SortElement> builder = new ArrayList<SortElement>();
    for (int i = 0; i < numFields; i++) {
      Field field = schema.getField(i);
      if (field.getType() == Type.OBJECT && field.getName().equals("my_avro") && random.nextBoolean()) {
        // With custom comparator
        builder.add(new SortElement(field.getName(), random.nextBoolean() ? Order.ASC
            : Order.DESC, random.nextBoolean() ? Criteria.NullOrder.NULL_SMALLEST
            : Criteria.NullOrder.NULL_BIGGEST, new DummyComparator()));
      } else {
        // Without custom comparator
        builder.add(new SortElement(field.getName(), random.nextBoolean() ? Order.ASC
            : Order.DESC, random.nextBoolean() ? Criteria.NullOrder.NULL_SMALLEST
            : Criteria.NullOrder.NULL_BIGGEST));
      }
    }
    return new OrderBy(builder);
View Full Code Here

   */

  protected static ITuple fillTuple(boolean isRandom, ITuple tuple, int minIndex, int maxIndex) {
    try {
      for (int i = minIndex; i <= maxIndex; i++) {
        Field field = tuple.getSchema().getField(i);
        if (field.isNullable() && random.nextDouble() < 0.2d) {
          tuple.set(i, null);
          continue;
        }
        switch (field.getType()) {
          case INT:
            tuple.set(i, isRandom ? random.nextInt() : 0);
            break;
          case LONG:
            tuple.set(i, isRandom ? random.nextLong() : 0);
            break;
          case BOOLEAN:
            tuple.set(i, isRandom ? random.nextBoolean() : false);
            break;
          case DOUBLE:
            tuple.set(i, isRandom ? random.nextDouble() : 0.0);
            break;
          case FLOAT:
            tuple.set(i, isRandom ? random.nextFloat() : 0f);
            break;
          case STRING:
            fillString(isRandom, tuple, i);
            break;
          case ENUM:
            fillEnum(isRandom, field, tuple, i);
            break;
          case OBJECT:
            fillObject(isRandom, tuple, field, i);
            break;
          case BYTES:
            fillBytes(isRandom, tuple, i);
            break;
          default:
            throw new IllegalArgumentException("Not supported type " + field.getType());
        }
      }
      return tuple;
    } catch (Exception e) {
      throw new RuntimeException(e);
View Full Code Here

        throw new IOException("Read line [" + Arrays.toString(readLine) + "] with [" + readLine.length
            + "] fields which is less than specified Schema [" + schema + "] fields [" + schema.getFields().size()
            + "]");
      }
      for(int i = 0; i < schema.getFields().size(); i++) {
        Field field = schema.getFields().get(i);
        try {
          switch(field.getType()) {
          case DOUBLE:
            tuple.set(i, Double.parseDouble(readLine[i]));
            break;
          case FLOAT:
            tuple.set(i, Float.parseFloat(readLine[i]));
            break;
          case ENUM:
            Class clazz = field.getObjectClass();
            tuple.set(i, Enum.valueOf(clazz, readLine[i]));
            break;
          case INT:
            tuple.set(i, Integer.parseInt(readLine[i]));
            break;
View Full Code Here

            }
            String currentValue = "";
            try {
              currentValue = readLine.get(index);
              if(currentValue != null) {
                Field field = schema.getFields().get(i);
                switch(field.getType()) {
                case INT:
                case LONG:
                case FLOAT:
                case DOUBLE:
                  processNumber(field.getType(), tuple, i, currentValue);
                  break;
                case ENUM:
                  Class clazz = field.getObjectClass();
                  tuple.set(i, Enum.valueOf(clazz, currentValue.trim()));
                  break;
                case STRING:
                  if (type == InputType.CSV) {
                    tuple.set(i, currentValue);
View Full Code Here

  public void readFields(ITuple tuple, Deserializer[] customDeserializers) throws IOException {
    Schema schema = tuple.getSchema();
    for(int index = 0; index < schema.getFields().size(); index++) {
      Deserializer customDeser = customDeserializers[index];
      Field field = schema.getField(index);
      switch(field.getType()) {
      case INT:
        tuple.set(index, WritableUtils.readVInt(input));
        break;
      case LONG:
        tuple.set(index, WritableUtils.readVLong(input));
        break;
      case DOUBLE:
        tuple.set(index, input.readDouble());
        break;
      case FLOAT:
        tuple.set(index, input.readFloat());
        break;
      case STRING:
        readUtf8(input, tuple, index);
        break;
      case BOOLEAN:
        byte b = input.readByte();
        tuple.set(index, (b != 0));
        break;
      case ENUM:
        readEnum(input, tuple, field.getObjectClass(), index);
        break;
      case BYTES:
        readBytes(input, tuple, index);
        break;
      case OBJECT:
        readCustomObject(input, tuple, field.getObjectClass(), index, customDeser);
        break;
      default:
        throw new IOException("Not supported type:" + field.getType());
      }
    }
  }
View Full Code Here

TOP

Related Classes of com.datasalt.pangool.io.Schema.Field

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.