Examples of com.datasalt.pangool.io.Schema.Field

com.datasalt.pangool.io.Schema.Field

  }


  public static Deserializer[] getDeserializers(Schema readSchema, Schema targetSchema, Configuration conf) {
    Deserializer[] result = new Deserializer[readSchema.getFields().size()];
    for (int i = 0; i < result.length; i++) {
      Field field = readSchema.getField(i);
      if (field.getObjectSerialization() == null) {
        continue;
      }
      Serialization serialization = ReflectionUtils.newInstance(field.getObjectSerialization(), conf);
      if (serialization instanceof FieldConfigurable) {
        Map<String, String> targetSchemaMetadata = null;
        // Look if this field is also in the target Schema, so we extract both metadata
        if(targetSchema.containsField(field.getName())) {
          Field targetSchemaField = targetSchema.getField(field.getName());
          if(targetSchemaField.getObjectSerialization() == null ||
            !targetSchemaField.getObjectSerialization().equals(field.getObjectSerialization())) {
            // Error: field in target schema with same name but different serialization mechanism!
            throw new RuntimeException("Target schema has field [" + field.getName() + "] with different serialization than read schema field with same name.");
          }
          targetSchemaMetadata = targetSchemaField.getProps();
        }
        ((FieldConfigurable) serialization).setFieldProperties(field.getProps(), targetSchemaMetadata);
      }
      result[i] = serialization.getDeserializer(field.getObjectClass());
    }

View Full Code Here

    Schema intermediateSchema = mrConfig.getIntermediateSchemas().get(0);
    Criteria commonSortCriteria = mrConfig.getCommonCriteria();
    List<Field> commonFields = new ArrayList<Field>();
    for (SortElement sortElement : commonSortCriteria.getElements()) {
      String fieldName = sortElement.getName();
      Field field = checkFieldInAllSchemas(fieldName);
      commonFields.add(Field.cloneField(field, fieldName));
    }


    // adding the rest
    for (Field field : intermediateSchema.getFields()) {
      Map<String, String> aliases = mrConfig.getFieldAliases(intermediateSchema.getName());
      if (!containsField(field.getName(), commonFields, aliases)) {
        commonFields.add(field);
      }
    }
    this.commonSchema = new Schema("common", commonFields);
  }

View Full Code Here

  private void calculateMultipleSourcesSubSchemas() throws TupleMRException {
    Criteria commonSortCriteria = mrConfig.getCommonCriteria();
    List<Field> commonFields = new ArrayList<Field>();
    for (SortElement sortElement : commonSortCriteria.getElements()) {
      String fieldName = sortElement.getName();
      Field field = checkFieldInAllSchemas(fieldName);


      commonFields.add(Field.cloneField(field, fieldName));
    }


    this.commonSchema = new Schema("common", commonFields);
    this.specificSchemas = new ArrayList<Schema>();
    List<List<Field>> specificFieldsBySource = new ArrayList<List<Field>>();


    for (int schemaId = 0; schemaId < mrConfig.getNumIntermediateSchemas(); schemaId++) {
      Criteria specificCriteria = mrConfig.getSpecificOrderBys().get(schemaId);
      List<Field> specificFields = new ArrayList<Field>();
      if (specificCriteria != null) {
        for (SortElement sortElement : specificCriteria.getElements()) {
          String fieldName = sortElement.getName();
          Field field = checkFieldInSchema(fieldName, schemaId);
          specificFields.add(Field.cloneField(field, fieldName));
        }
      }
      specificFieldsBySource.add(specificFields);
    }


    for (int i = 0; i < mrConfig.getNumIntermediateSchemas(); i++) {
      Schema sourceSchema = mrConfig.getIntermediateSchema(i);
      List<Field> specificFields = specificFieldsBySource.get(i);
      for (Field field : sourceSchema.getFields()) {
        Map<String, String> sourceAliases = mrConfig.getFieldAliases(sourceSchema.getName());
        if (!containsField(field.getName(), commonSchema.getFields(), sourceAliases)
            && !containsField(field.getName(), specificFields, sourceAliases)) {
          specificFields.add(field);
        }
      }
      this.specificSchemas.add(new Schema("specific", specificFields));
    }

View Full Code Here

   * select a representative field that will be used for serializing. In the case of
   * having a mixture of fields, some of them nullable and some others no nullables,
   * a nullable Field will be returned.
   */
  private Field checkFieldInAllSchemas(String name) throws TupleMRException {
    Field field = null;
    for (int i = 0; i < mrConfig.getIntermediateSchemas().size(); i++) {
      Field fieldInSource = checkFieldInSchema(name, i);
      if (field == null) {
        field = fieldInSource;
      } else if (field.getType() != fieldInSource.getType() || field.getObjectClass() != fieldInSource.getObjectClass()) {
        throw new TupleMRException("The type for field '" + name
            + "' is not the same in all the sources");
      } else if (fieldInSource.isNullable()) {
        // IMPORTANT CASE. Nullable fields must be returned when present nullable and non nullable fields mixed
        field = fieldInSource;
      }
    }
    return field;

View Full Code Here

  }


  private Field checkFieldInSchema(String fieldName, int schemaId)
      throws TupleMRException {
    Schema schema = mrConfig.getIntermediateSchema(schemaId);
    Field field = getFieldUsingAliases(schema, fieldName);
    if (field == null) {
      throw new TupleMRException("Field '" + fieldName + "' not present in source '"
          + schema.getName() + "' " + schema);
    }
    return field;

View Full Code Here

    }
    schemas.add(schema);
  }


  private boolean fieldSameTypeInAllSources(String fieldName) {
    Field field = null;
    for (Schema source : schemas) {
      Field currentField = source.getField(fieldName);
      if (currentField == null) {
        Map<String, String> aliases = fieldAliases.get(source.getName());
        currentField = source.getField(aliases.get(fieldName));
      }
      if (field == null) {
        field = currentField;
      } else {
        if (field.getType() != currentField.getType() ||
            field.getObjectClass() != currentField.getObjectClass() ||
            field.getObjectSerialization() != currentField.getObjectSerialization()) {
          return false;
        }
      }
    }
    return true;

View Full Code Here

        throw new TupleMRException("Can't sort by field '" + sortElement.getName()
            + "' since its type differs among sources");
      }


      if (sortElement.getCustomComparator() != null) {
        Field field = firstSchema.getField(sortElement.getName());
        if (field.getType() != Type.OBJECT) {
          throw new TupleMRException("Not allowed to specify custom comparator for type=" + field.getType());
        }
      }
    }
    // group by fields need to be a prefix of sort by fields
    for (String groupField : groupByFields) {

View Full Code Here

      if (!Schema.containsFieldUsingAlias(schema, e.getName(), aliases)) {
        throw new TupleMRException("Source '" + schemaName + "' doesn't contain field '"
            + e.getName());
      }
      if (e.getCustomComparator() != null) {
        Field field = schema.getField(e.getName());
        if (field == null) {
          field = schema.getField(aliases.get(e.getName()));
        }
        if (field.getType() != Type.OBJECT) {
          throw new TupleMRException("Not allowed to set custom comparator for type=" + field.getType());
        }
      }
    }


    for (SortElement e : ordering.getElements()) {

View Full Code Here

    // calculate a lookup table for backwards compatibility
    // "UNUSED" will mean the field is not used anymore
    backwardsCompatibiltyLookupVector = new int[readSchema.getFields().size()];
    for(int i = 0; i < readSchema.getFields().size(); i++) {
      backwardsCompatibiltyLookupVector[i] = UNUSED;
      Field field = readSchema.getFields().get(i);
      if(targetSchema.containsField(field.getName())) {
        backwardsCompatibiltyLookupVector[i] = targetSchema.getFieldPos(field.getName());
      }
    }
    for(int i = 0; i < targetSchema.getFields().size(); i++) {
      Field field = targetSchema.getFields().get(i);
      if(!readSchema.containsField(field.getName())) {
        newFields.add(field);
      }
    }


    deserializers = SerializationInfo.getDeserializers(readSchema, targetSchema, conf);

View Full Code Here

    }


    // Field by field deserialization
    for(int index = 0; index < schema.getFields().size(); index++) {
      Deserializer customDeser = customDeserializers[index];
      Field field = schema.getField(index);


      // Nulls control
      if(field.isNullable() && nullsAbsolute.flags[index]) {
        // Null field. Nothing to deserialize.
        continue;
      }


      /*
       * If we configured the Deserializer to use two Schemas,
       * this will give us the real index for the destination Tuple.
       * If it gives "UNUSED" it means the field being read is not used.
       * We will deal with this depending on wether we read a primitive field or 
       * a complex data type.
       */
      int idx = backwardsCompatibleIndex(index);


      switch(field.getType()) {
      case INT:
        int iVal = WritableUtils.readVInt(input);
        if(idx != UNUSED) {
          tuple.set(idx, iVal);
        } // If the primitive field is not used we just don't set it
        break;
      case LONG:
        long lVal = WritableUtils.readVLong(input);
        if(idx != UNUSED) {
          tuple.set(idx, lVal);
        } // If the primitive field is not used we just don't set it
        break;
      case DOUBLE:
        double dVal = input.readDouble();
        if(idx != UNUSED) {
          tuple.set(idx, dVal);
        } // If the primitive field is not used we just don't set it
        break;
      case FLOAT:
        float fVal = input.readFloat();
        if(idx != UNUSED) {
          tuple.set(idx, fVal);
        } // If the primitive field is not used we just don't set it
        break;
      case STRING:
        if(idx == UNUSED) {
          // The field is unused so we use a private cached Tuple for skipping its bytes
          readUtf8(input, cachedReadTuple(), index);
        } else {
          readUtf8(input, tuple, idx);
        }
        break;
      case BOOLEAN:
        byte b = input.readByte();
        if(idx != UNUSED) {
          tuple.set(idx, (b != 0));
        } // If the primitive field is not used we just don't set it
        break;
      case ENUM:
        if(idx == UNUSED) {
          // The field is unused so we use a private cached Tuple for skipping its bytes
          readEnum(input, cachedReadTuple(), field.getObjectClass(), index);
        } else {
          readEnum(input, tuple, field.getObjectClass(), idx);
        }
        break;
      case BYTES:
        if(idx == UNUSED) {
          // The field is unused so we use a private cached Tuple for skipping its bytes
          readBytes(input, cachedReadTuple(), index);
        } else {
          readBytes(input, tuple, idx);
        }
        break;
      case OBJECT:
        if(idx == UNUSED) {
          // The field is unused so we use a private cached Tuple for skipping its bytes
          readCustomObject(input, cachedReadTuple(), field.getObjectClass(), index, customDeser);
        } else {
          readCustomObject(input, tuple, field.getObjectClass(), idx, customDeser);
        }
        break;
      default:
        throw new IOException("Not supported type:" + field.getType());
      }
    }
  }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of com.datasalt.pangool.io.Schema.Field

com.datasalt.pangool.BaseTest

com.datasalt.pangool.examples.avro.AvroCustomSerializationJob

com.datasalt.pangool.examples.avro.AvroTopicalWordCount

com.datasalt.pangool.examples.avro.AvroTweetsJoin

com.datasalt.pangool.io.Fields

com.datasalt.pangool.io.Tuple

com.datasalt.pangool.tuplemr.mapred.lib.input.TupleTextInputFormat$TupleTextInputReader

com.datasalt.pangool.tuplemr.mapred.SortComparator

com.datasalt.pangool.tuplemr.mapred.TestComparators

com.datasalt.pangool.tuplemr.serialization.SimpleTupleDeserializer

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.