Package com.datasalt.pangool.io.Schema

Examples of com.datasalt.pangool.io.Schema.Field


   * @throws IOException
   */
  private void write(Schema destinationSchema, ITuple tuple,
      int[] translationTable, DataOutput output,FieldSerializer[] customSerializers) throws IOException {
    for (int i = 0; i < destinationSchema.getFields().size(); i++) {
      Field field = destinationSchema.getField(i);
      Type fieldType = field.getType();
      Object element = tuple.get(translationTable[i]);
      try {
        switch(fieldType){
        case INT:
          WritableUtils.writeVInt(output, (Integer) element); break;
View Full Code Here


  public void readFields(ITuple tuple, DataInputStream input,FieldDeserializer[] customDeserializers)
      throws IOException {
    Schema schema = tuple.getSchema();
    for(int index = 0; index < schema.getFields().size(); index++) {
      FieldDeserializer customDeser = customDeserializers[index];
      Field field = schema.getField(index);
      switch(field.getType()){
      case INT:  tuple.set(index,WritableUtils.readVInt(input)); break;
      case LONG: tuple.set(index,WritableUtils.readVLong(input)); break;
      case DOUBLE: tuple.set(index,input.readDouble()); break;
      case FLOAT: tuple.set(index,input.readFloat()); break;
      case STRING: readUtf8(input,tuple,index); break;
      case BOOLEAN:
        byte b = input.readByte();
        tuple.set(index,(b != 0));
        break;
      case ENUM: readEnum(input,tuple,field.getObjectClass(),index); break;
      case BYTES: readBytes(input,tuple,index); break;
      case OBJECT: readCustomObject(input,tuple,field.getObjectClass(),index,customDeser); break;
      default:
        throw new IOException("Not supported type:" + field.getType());
      }
    }
  }
View Full Code Here

  }

  public static FieldSerializer[] getSerializers(Schema schema){
    FieldSerializer[] result = new FieldSerializer[schema.getFields().size()];
    for (int i= 0 ; i < result.length; i++){
      Field field = schema.getField(i);
      if (field.getSerializerClass() != null){
        FieldSerializer ser = ReflectionUtils.newInstance(field.getSerializerClass(),null);
        ser.setProps(field.getProps());
        result[i] = ser;
      }
    }
    return result;
  }
View Full Code Here

  }
 
  public static FieldDeserializer[] getDeserializers(Schema schema){
    FieldDeserializer[] result = new FieldDeserializer[schema.getFields().size()];
    for (int i= 0 ; i < result.length; i++){
      Field field = schema.getField(i);
      if (field.getSerializerClass() != null){
        FieldDeserializer ser = ReflectionUtils.newInstance(field.getDeserializerClass(),null);
        ser.setProps(field.getProps());
        result[i] = ser;
      }
    }
    return result;
  }
View Full Code Here

    Schema intermediateSchema = mrConfig.getIntermediateSchemas().get(0);
    Criteria commonSortCriteria = mrConfig.getCommonCriteria();
    List<Field> commonFields = new ArrayList<Field>();
    for(SortElement sortElement : commonSortCriteria.getElements()) {
      String fieldName = sortElement.getName();
      Field field = checkFieldInAllSchemas(fieldName);
      commonFields.add(Field.cloneField(field, fieldName));
    }

    // adding the rest
    for(Field field : intermediateSchema.getFields()) {
      Map<String,String> aliases = mrConfig.getFieldAliases(intermediateSchema.getName());
      if(!containsField(field.getName(),commonFields,aliases))  {
          commonFields.add(field);
      }
    }
    this.commonSchema = new Schema("common", commonFields);
  }
View Full Code Here

  private void calculateMultipleSourcesSubSchemas() throws TupleMRException {
    Criteria commonSortCriteria = mrConfig.getCommonCriteria();
    List<Field> commonFields = new ArrayList<Field>();
    for(SortElement sortElement : commonSortCriteria.getElements()) {
      String fieldName = sortElement.getName();
      Field field = checkFieldInAllSchemas(fieldName);
     
      commonFields.add(Field.cloneField(field,fieldName));
    }

    this.commonSchema = new Schema("common", commonFields);
    this.specificSchemas = new ArrayList<Schema>();
    List<List<Field>> specificFieldsBySource = new ArrayList<List<Field>>();

    for(int schemaId = 0; schemaId < mrConfig.getNumIntermediateSchemas(); schemaId++) {
      Criteria specificCriteria = mrConfig.getSpecificOrderBys().get(schemaId);
      List<Field> specificFields = new ArrayList<Field>();
      if(specificCriteria != null) {
        for(SortElement sortElement : specificCriteria.getElements()) {
          String fieldName = sortElement.getName();
          Field field = checkFieldInSchema(fieldName, schemaId);
          specificFields.add(Field.cloneField(field,fieldName));
        }
      }
      specificFieldsBySource.add(specificFields);
    }

    for(int i = 0; i < mrConfig.getNumIntermediateSchemas(); i++) {
      Schema sourceSchema = mrConfig.getIntermediateSchema(i);
      List<Field> specificFields = specificFieldsBySource.get(i);
      for(Field field : sourceSchema.getFields()) {
        Map<String,String> sourceAliases = mrConfig.getFieldAliases(sourceSchema.getName());
        if(!containsField(field.getName(),commonSchema.getFields(),sourceAliases)
            && !containsField(field.getName(), specificFields,sourceAliases)) {
          specificFields.add(field);
        }
      }
      this.specificSchemas.add(new Schema("specific", specificFields));
    }
View Full Code Here

    }
    return false;
  }

  private Field checkFieldInAllSchemas(String name) throws TupleMRException {
    Field field = null;
    for(int i = 0; i < mrConfig.getIntermediateSchemas().size(); i++) {
      Field fieldInSource = checkFieldInSchema(name, i);
      if(field == null) {
        field = fieldInSource;
      } else if(field.getType() != fieldInSource.getType() || field.getObjectClass() != fieldInSource.getObjectClass()) {
        throw new TupleMRException("The type for field '" + name
            + "' is not the same in all the sources");
      }
    }
    return field;
View Full Code Here

  }

  private Field checkFieldInSchema(String fieldName, int schemaId)
      throws TupleMRException {
    Schema schema = mrConfig.getIntermediateSchema(schemaId);
    Field field = getFieldUsingAliases(schema,fieldName);
    if(field == null) {
      throw new TupleMRException("Field '" + fieldName + "' not present in source '"
          + schema.getName() + "' " + schema);
    }
    return field;
View Full Code Here

    }
    schemas.add(schema);
  }

  private boolean fieldSameTypeInAllSources(String fieldName) {
    Field field = null;
    for(Schema source : schemas) {
      Field currentField = source.getField(fieldName);
      if (currentField == null){
        Map<String,String> aliases = fieldAliases.get(source.getName());
        currentField= source.getField(aliases.get(fieldName));
      }
      if(field == null) {
        field = currentField;
      } else {
        if(field.getType() != currentField.getType() ||
          field.getObjectClass() != currentField.getObjectClass() ||
          field.getSerializerClass() != currentField.getSerializerClass() ||
          field.getDeserializerClass() != currentField.getDeserializerClass()){
          return false;
        }
      }
    }
    return true;
View Full Code Here

   
    Schema pangoolSchema = tuple.getSchema();
    for(org.apache.avro.Schema.Field avroField : avroSchema.getFields()) {
      int pos = avroField.pos();
      Object objRecord = record.get(pos);
      Field pangoolField = pangoolSchema.getField(pos);
      switch(pangoolField.getType()){
      case INT:
      case LONG:
      case BOOLEAN:
      case FLOAT:
      case DOUBLE:
        tuple.set(pos,objRecord); //very optimistic
        break;
      case STRING:{
        if (!(tuple.get(pos) instanceof Utf8)){
          tuple.set(pos,new com.datasalt.pangool.io.Utf8());
        }
        com.datasalt.pangool.io.Utf8 utf8=(com.datasalt.pangool.io.Utf8)tuple.get(pos);
        if (objRecord instanceof String){
          utf8.set((String)objRecord);
        } else if (objRecord instanceof Utf8){
          Utf8 avroUtf8 = (Utf8)objRecord;
          utf8.set(avroUtf8.getBytes(),0,avroUtf8.getByteLength());
        } else {
          throw new IOException("Not supported avro field " +
              org.apache.avro.Schema.Type.STRING + " with instance " + objRecord.getClass().getName());
        }
        break;}
      case ENUM:{
          Class clazz = pangoolField.getObjectClass();
          Enum e = Enum.valueOf(clazz,objRecord.toString());
          tuple.set(pos,e);
        break;
      }
      case BYTES:
        tuple.set(pos,objRecord); //TODO FIXME this should copy bytes really, not reference!
        break;
      case OBJECT:
        FieldDeserializer customDeser = customDeserializers[pos];
        if (objRecord instanceof byte[]){
          inputBuffer.reset((byte[])objRecord,((byte[])objRecord).length);
        } else if (objRecord instanceof ByteBuffer){
          ByteBuffer buffer = (ByteBuffer)objRecord;
          int offset = buffer.arrayOffset()+buffer.position();
          int length = buffer.limit()- buffer.position();
          inputBuffer.reset(buffer.array(),offset,length);
        } else {
          throw new PangoolRuntimeException("Can't convert to OBJECT from instance " + objRecord.getClass());
        }
        if (customDeser != null){
          customDeser.open(inputBuffer);
          tuple.set(pos,customDeser.deserialize(tuple.get(pos))); //TODO FIXME avro deserializer shouldn't reuse objects sometimes (UNION ?)
          customDeser.close(); //TODO is this ok ?
         
        } else {
            //no custom deser , then use Hadoop serializers registered in "io.serializations"
          Class clazz = pangoolField.getObjectClass();
          if(tuple.get(pos) == null || tuple.get(pos).getClass() != clazz) {
            tuple.set(pos, ReflectionUtils.newInstance(clazz, conf));
          }
          hadoopSer.deser(tuple.get(pos),inputBuffer);
        }
View Full Code Here

TOP

Related Classes of com.datasalt.pangool.io.Schema.Field

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.