Package com.datasalt.pangool.io

Examples of com.datasalt.pangool.io.Schema


     * IMPORTANT: Group schema must inherit the containsNullableFields() from the
     * common schema. Otherwise, code using the group schema for deserialize
     * (like comparators) would fail, as some cases the group schema could not
     * have null fields, but the common schema could have them.
     */
    this.groupSchema = new Schema("group", groupFields) {
      boolean containsNulls = commonSchema.containsNullableFields();

      @Override
      public boolean containsNullableFields() {
        return containsNulls;
View Full Code Here


  private void initSpecificSchemaSerialization() {
    specificSerializers = new ArrayList<Serializer[]>();
    specificDeserializers = new ArrayList<Deserializer[]>();
    for (int i = 0; i < specificSchemas.size(); i++) {
      Schema specificSchema = specificSchemas.get(i);
      //TODO Should SerializationInfo contain Configuration ?
      specificSerializers.add(getSerializers(specificSchema, null));
      specificDeserializers.add(getDeserializers(specificSchema, null));
    }
  }
View Full Code Here

      fieldsToPartition.add(posFields);
    }
  }

  private void calculateOneIntermediateCommonSchema() throws TupleMRException {
    Schema intermediateSchema = mrConfig.getIntermediateSchemas().get(0);
    Criteria commonSortCriteria = mrConfig.getCommonCriteria();
    List<Field> commonFields = new ArrayList<Field>();
    for (SortElement sortElement : commonSortCriteria.getElements()) {
      String fieldName = sortElement.getName();
      Field field = checkFieldInAllSchemas(fieldName);
      commonFields.add(Field.cloneField(field, fieldName));
    }

    // adding the rest
    for (Field field : intermediateSchema.getFields()) {
      Map<String, String> aliases = mrConfig.getFieldAliases(intermediateSchema.getName());
      if (!containsField(field.getName(), commonFields, aliases)) {
        commonFields.add(field);
      }
    }
    this.commonSchema = new Schema("common", commonFields);
  }
View Full Code Here

      Field field = checkFieldInAllSchemas(fieldName);

      commonFields.add(Field.cloneField(field, fieldName));
    }

    this.commonSchema = new Schema("common", commonFields);
    this.specificSchemas = new ArrayList<Schema>();
    List<List<Field>> specificFieldsBySource = new ArrayList<List<Field>>();

    for (int schemaId = 0; schemaId < mrConfig.getNumIntermediateSchemas(); schemaId++) {
      Criteria specificCriteria = mrConfig.getSpecificOrderBys().get(schemaId);
      List<Field> specificFields = new ArrayList<Field>();
      if (specificCriteria != null) {
        for (SortElement sortElement : specificCriteria.getElements()) {
          String fieldName = sortElement.getName();
          Field field = checkFieldInSchema(fieldName, schemaId);
          specificFields.add(Field.cloneField(field, fieldName));
        }
      }
      specificFieldsBySource.add(specificFields);
    }

    for (int i = 0; i < mrConfig.getNumIntermediateSchemas(); i++) {
      Schema sourceSchema = mrConfig.getIntermediateSchema(i);
      List<Field> specificFields = specificFieldsBySource.get(i);
      for (Field field : sourceSchema.getFields()) {
        Map<String, String> sourceAliases = mrConfig.getFieldAliases(sourceSchema.getName());
        if (!containsField(field.getName(), commonSchema.getFields(), sourceAliases)
            && !containsField(field.getName(), specificFields, sourceAliases)) {
          specificFields.add(field);
        }
      }
      this.specificSchemas.add(new Schema("specific", specificFields));
    }
    this.specificSchemas = Collections.unmodifiableList(this.specificSchemas);
  }
View Full Code Here

    return field;
  }

  private Field checkFieldInSchema(String fieldName, int schemaId)
      throws TupleMRException {
    Schema schema = mrConfig.getIntermediateSchema(schemaId);
    Field field = getFieldUsingAliases(schema, fieldName);
    if (field == null) {
      throw new TupleMRException("Field '" + fieldName + "' not present in source '"
          + schema.getName() + "' " + schema);
    }
    return field;
  }
View Full Code Here

    return groupSchema;
  }

  private void calculateIndexTranslations() {
    for (int schemaId = 0; schemaId < mrConfig.getIntermediateSchemas().size(); schemaId++) {
      Schema sourceSchema = mrConfig.getIntermediateSchema(schemaId);
      commonToIntermediateIndexes.add(getIndexTranslation(commonSchema, sourceSchema));
      groupToIntermediateIndexes.add(getIndexTranslation(groupSchema, sourceSchema));
      if (specificSchemas != null && !specificSchemas.isEmpty()) {
        Schema particularSchema = specificSchemas.get(schemaId);
        specificToIntermediateIndexes.add(getIndexTranslation(particularSchema,
            sourceSchema));
      }
    }
    commonToIntermediateIndexes = Collections
View Full Code Here

    // Configure schema, sort and group by
    List<Field> fields = new ArrayList<Field>();
    fields.add(Field.create("first",Type.INT));
    fields.add(Field.create("second",Type.INT));
   
    Schema schema = new Schema("my_schema",fields);
    TupleMRBuilder builder = new TupleMRBuilder(conf);
    builder.addIntermediateSchema(schema);
    builder.setGroupByFields("first");
    builder.setOrderBy(new OrderBy().add("first",Order.ASC).add("second",Order.ASC));
    // Input / output and such
View Full Code Here

* "timestamp", "ip"].
*/
public class UrlResolution extends BaseExampleJob {

  static Schema getURLRegisterSchema() {
    return new Schema("urlRegister", Fields.parse("url:string, timestamp:long, ip:string"));
  }
View Full Code Here

  static Schema getURLRegisterSchema() {
    return new Schema("urlRegister", Fields.parse("url:string, timestamp:long, ip:string"));
  }

  static Schema getURLMapSchema() {
    return new Schema("urlMap", Fields.parse("nonCanonicalUrl:string, canonicalUrl:string"));
  }
View Full Code Here

    List<Field> fields = new ArrayList<Field>();
    fields.add(Field.create("url", Type.STRING));
    fields.add(Field.create("date", Type.STRING));
    fields.add(Field.create("visits",Type.INT));

    Schema schema = new Schema("my_schema", fields);

    TupleMRBuilder mr = new TupleMRBuilder(conf);
    mr.addIntermediateSchema(schema);
    mr.setGroupByFields("url");
    mr.setOrderBy(new OrderBy().add("url", Order.ASC).add("date", Order.ASC));
View Full Code Here

TOP

Related Classes of com.datasalt.pangool.io.Schema

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.