Package com.datasalt.pangool.io.Schema

Examples of com.datasalt.pangool.io.Schema.Field


  }

  public void write(Schema destinationSchema, ITuple tuple, int[] translationTable, Serializer[] customSerializers)
      throws IOException {
    for(int i = 0; i < destinationSchema.getFields().size(); i++) {
      Field field = destinationSchema.getField(i);
      Type fieldType = field.getType();
      Object element;
      if(translationTable == null) {
        element = tuple.get(i);
      } else {
        element = tuple.get(translationTable[i]);
View Full Code Here


  }

  private boolean fieldSameTypeInAllSources(String field) {
    Type type = null;
    for(Schema source : schemas) {
      Field f = source.getField(field);
      if(type == null) {
        type = f.getType();
      } else if(type != f.getType()) {
        return false;
      }
    }
    return true;
  }
View Full Code Here

  public static String toString(ITuple tuple) {
    Schema schema = tuple.getSchema();
    StringBuilder b = new StringBuilder();
    b.append("{");
    for(int i = 0; i < schema.getFields().size(); i++) {
      Field f = schema.getField(i);
      if(i != 0) {
        b.append(",");
      }
      b.append("\"").append(f.getName()).append("\"").append(":");
      switch(f.getType()) {
      case INT:
      case LONG:
      case FLOAT:
      case DOUBLE:
      case BOOLEAN:
        b.append(tuple.get(i));
        break;
      case STRING:
      case ENUM:
        b.append("\"").append(tuple.get(i)).append("\"");
        break;
      case OBJECT:
        b.append("{").append(tuple.get(i)).append("}");
        break;
      default:
        throw new PangoolRuntimeException("Not stringifiable type :" + f.getType());
      }
    }
    b.append("}");
    return b.toString();
  }
View Full Code Here

    super("Usage: AvroTopicalWordCount [input_path] [output_path]");
  }

  static Schema getSchema() {
    org.apache.avro.Schema avroSchema = getAvroSchema();
    Field avroField = Field.createObject("my_avro",Object.class);
    avroField.setObjectSerialization(AvroFieldSerialization.class);
    avroField.addProp("avro.schema",avroSchema.toString());
    return new Schema("schema",Arrays.asList(avroField));
  }
View Full Code Here

      }
    }
  }

  private static Schema getPangoolTweetSchema() {
    Field tweetIdField = Field.create("tweet_id",Schema.Field.Type.INT);
    Field tweetHashTags = Field.createObject("tweet_hashtags",Array.class);
    tweetHashTags.setObjectSerialization(AvroFieldSerialization.class);
    tweetHashTags.addProp("avro.schema",getAvroStringArraySchema().toString());
    return new Schema("tweet",Arrays.asList(tweetIdField,tweetHashTags));
  }
View Full Code Here

    tweetHashTags.addProp("avro.schema",getAvroStringArraySchema().toString());
    return new Schema("tweet",Arrays.asList(tweetIdField,tweetHashTags));
  }
 
  private static Schema getPangoolRetweetSchema(){
    Field userId = Field.create("username",Schema.Field.Type.STRING);
    Field tweetId = Field.create("tweet_id",Schema.Field.Type.INT);
    return new Schema("retweet",Arrays.asList(userId,tweetId));
  }
View Full Code Here

    Criteria commonSortCriteria = mrConfig.getCommonCriteria();
    List<Field> commonFields = new ArrayList<Field>();
    for(SortElement sortElement : commonSortCriteria.getElements()) {
      String fieldName = sortElement.getName();
      Field field = checkFieldInAllSchemas(fieldName);
      commonFields.add(field);
    }

    // adding the rest
    for(Field field : sourceSchema.getFields()) {
      if(!containsFieldName(field.getName(), commonFields)) {
        commonFields.add(field);
      }
    }
    this.commonSchema = new Schema("common", commonFields);
  }
View Full Code Here

  private void calculateMultipleSourcesSubSchemas() throws TupleMRException {
    Criteria commonSortCriteria = mrConfig.getCommonCriteria();
    List<Field> commonFields = new ArrayList<Field>();
    for(SortElement sortElement : commonSortCriteria.getElements()) {
      String fieldName = sortElement.getName();
      Field field = checkFieldInAllSchemas(fieldName);
      commonFields.add(field);
    }

    this.commonSchema = new Schema("common", commonFields);
    this.specificSchemas = new ArrayList<Schema>();
    List<List<Field>> specificFieldsBySource = new ArrayList<List<Field>>();

    for(int schemaId = 0; schemaId < mrConfig.getNumIntermediateSchemas(); schemaId++) {
      Criteria specificCriteria = mrConfig.getSpecificOrderBys().get(schemaId);
      List<Field> specificFields = new ArrayList<Field>();
      if(specificCriteria != null) {
        for(SortElement sortElement : specificCriteria.getElements()) {
          String fieldName = sortElement.getName();
          Field field = checkFieldInSchema(fieldName, schemaId);
          specificFields.add(field);
        }
      }
      specificFieldsBySource.add(specificFields);
    }

    for(int i = 0; i < mrConfig.getNumIntermediateSchemas(); i++) {
      Schema sourceSchema = mrConfig.getIntermediateSchema(i);
      List<Field> specificFields = specificFieldsBySource.get(i);
      for(Field field : sourceSchema.getFields()) {
        if(!commonSchema.containsField(field.getName())
            && !containsFieldName(field.getName(), specificFields)) {
          specificFields.add(field);
        }
      }
      this.specificSchemas.add(new Schema("specific", specificFields));
View Full Code Here

    }
    return false;
  }

  private Field checkFieldInAllSchemas(String name) throws TupleMRException {
    Field field = null;
    for(int i = 0; i < mrConfig.getIntermediateSchemas().size(); i++) {
      Field fieldInSource = checkFieldInSchema(name, i);
      if(field == null) {
        field = fieldInSource;
      } else if(!field.equals(fieldInSource)) {
        throw new TupleMRException("The type for field '" + name
            + "' is not the same in all the sources");
View Full Code Here

  }

  private Field checkFieldInSchema(String fieldName, int schemaId)
      throws TupleMRException {
    Schema schema = mrConfig.getIntermediateSchema(schemaId);
    Field field = schema.getField(fieldName);
    if(field == null) {
      throw new TupleMRException("Field '" + fieldName + "' not present in source '"
          + schema.getName() + "' " + schema);
    }
    return field;
View Full Code Here

TOP

Related Classes of com.datasalt.pangool.io.Schema.Field

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.