Package com.datasalt.pangool.io.Schema

Examples of com.datasalt.pangool.io.Schema.Field


  }

  private boolean fieldSameTypeInAllSources(String field) {
    Type type = null;
    for(Schema source : schemas) {
      Field f = source.getField(field);
      if(type == null) {
        type = f.getType();
      } else if(type != f.getType()) {
        return false;
      }
    }
    return true;
  }
View Full Code Here


  public static String toString(ITuple tuple) {
    Schema schema = tuple.getSchema();
    StringBuilder b = new StringBuilder();
    b.append("{");
    for(int i = 0; i < schema.getFields().size(); i++) {
      Field f = schema.getField(i);
      if(i != 0) {
        b.append(",");
      }
      b.append("\"").append(f.getName()).append("\"").append(":");
      switch(f.getType()) {
      case INT:
      case LONG:
      case FLOAT:
      case DOUBLE:
      case BOOLEAN:
        b.append(tuple.get(i));
        break;
      case STRING:
      case ENUM:
        b.append("\"").append(tuple.get(i)).append("\"");
        break;
      case OBJECT:
        b.append("{").append(tuple.get(i)).append("}");
        break;
      default:
        throw new PangoolRuntimeException("Not stringifiable type :" + f.getType());
      }
    }
    b.append("}");
    return b.toString();
  }
View Full Code Here

      }
    }
  }

  private static Schema getPangoolTweetSchema() {
    Field tweetIdField = Field.create("tweet_id", Schema.Field.Type.INT);
    Field tweetHashTags = Fields.createAvroField("tweet_hashtags", getAvroStringArraySchema(), false);
    return new Schema("tweet", Arrays.asList(tweetIdField, tweetHashTags));
  }
View Full Code Here

    Field tweetHashTags = Fields.createAvroField("tweet_hashtags", getAvroStringArraySchema(), false);
    return new Schema("tweet", Arrays.asList(tweetIdField, tweetHashTags));
  }

  private static Schema getPangoolRetweetSchema() {
    Field userId = Field.create("username", Schema.Field.Type.STRING);
    Field tweetId = Field.create("tweet_id", Schema.Field.Type.INT);
    return new Schema("retweet", Arrays.asList(userId, tweetId));
  }
View Full Code Here

  public AvroTopicalWordCount() {
    super("Usage: AvroTopicalWordCount [input_path] [output_path]");
  }

  static Schema getSchema() {
    Field avroField = Fields.createAvroField("my_avro", getAvroSchema(), false);
    return new Schema("schema", Arrays.asList(avroField));
  }
View Full Code Here

            }
            String currentValue = "";
            try {
              currentValue = readLine == null ? null : readLine.get(index);
              if(currentValue != null) {
                Field field = schema.getFields().get(i);
                switch(field.getType()) {
                case INT:
                case LONG:
                case FLOAT:
                case DOUBLE:
                  processNumber(field.getType(), tuple, i, currentValue);
                  break;
                case ENUM:
                  Class clazz = field.getObjectClass();
                  tuple.set(i, Enum.valueOf(clazz, currentValue.trim()));
                  break;
                case STRING:
                  if(type == InputType.CSV) {
                    tuple.set(i, currentValue);
View Full Code Here

   *                   or deserialized
   * @return
   */
  public static Field createAvroField(String name,
                                      org.apache.avro.Schema avroSchema, boolean isReflect) {
    Field field = Field.createObject(name, Object.class);
    field.setObjectSerialization(AvroFieldSerialization.class);
    field.addProp("avro.schema", avroSchema.toString());
    field.addProp("avro.reflection", Boolean.toString(isReflect));
    return field;
  }
View Full Code Here

  public static String toString(ITuple tuple) {
    Schema schema = tuple.getSchema();
    StringBuilder b = new StringBuilder();
    b.append("{");
    for (int i = 0; i < schema.getFields().size(); i++) {
      Field f = schema.getField(i);
      if (i != 0) {
        b.append(",");
      }
      b.append("\"").append(f.getName()).append("\"").append(":");
      if (tuple.get(i) == null) {
        b.append("null");
        continue;
      }
      switch (f.getType()) {
        case INT:
        case LONG:
        case FLOAT:
        case DOUBLE:
        case BOOLEAN:
          b.append(tuple.get(i));
          break;
        case STRING:
        case ENUM:

          b.append("\"").append(tuple.get(i)).append("\"");
          break;
        case OBJECT:
          b.append("{").append(tuple.get(i).toString()).append("}");
          break;
        case BYTES:
          Object o = tuple.get(i);
          b.append("{\"bytes\": \"");
          byte[] bytes;
          int offset, length;
          if (o instanceof ByteBuffer) {
            ByteBuffer byteBuffer = (ByteBuffer) o;
            bytes = byteBuffer.array();
            offset = byteBuffer.arrayOffset() + byteBuffer.position();
            length = byteBuffer.limit() - byteBuffer.position();
          } else {
            //byte[]
            bytes = (byte[]) o;
            offset = 0;
            length = bytes.length;
          }
          for (int p = offset; p < offset + length; p++) {
            b.append((char) bytes[p]);
          }
          b.append("\"}");
          break;
        default:
          throw new PangoolRuntimeException("Not stringifiable type :" + f.getType());
      }
    }
    b.append("}");
    return b.toString();
  }
View Full Code Here

   */
  public static Tuple deepCopy(ITuple tuple, Map<String, FieldClonator> customClonators) {
    Schema schema = tuple.getSchema();
    Tuple newTuple = new Tuple(schema);
    for (int i = 0; i < schema.getFields().size(); i++) {
      Field field = schema.getField(i);

      Object value = tuple.get(i);
      if (value == null) {
        continue;
      }

      if (customClonators != null && customClonators.containsKey(field.getName())) {
        // There is a custom clonator
        newTuple.set(i, customClonators.get(field.getName()).giveMeACopy(value));
        continue;
      }

      switch (field.getType()) {
        case BYTES:
          if (value instanceof ByteBuffer) {
            newTuple.set(i, cloneByteBuffer((ByteBuffer) value));
          } else if (value instanceof byte[]) {
            newTuple.set(i, Arrays.copyOf((byte[]) value, ((byte[]) value).length));
          } else {
            throw new IllegalArgumentException("Field " + field.getName() + " of type " + field.getType()
                + " cannot contains values of class " + value.getClass().getCanonicalName());
          }
          break;
        case OBJECT:
          if (value instanceof ITuple) {
            throw new IDontKnowHowToCopyThisStuff("Tuples inside tuples requires a custom FieldClonator" +
                "to perform the copy. Please, provide a custom FieldClonator for field " + field.getName()
                + ". It usually is as simple as create one that calls the deepCopy method for the " +
                "inner tuple");
          } else {
            throw new IDontKnowHowToCopyThisStuff("I don't know how to copy the field " + field.getName()
                + " with type " + value.getClass().getCanonicalName() + ". Please, provide a custom " +
                "FieldClonator for this field in order to be able to perform deep copies");
          }
        case STRING:
          if (value instanceof String) {
            newTuple.set(i, tuple.get(i));
          } else if (value instanceof Utf8 || value instanceof Text) {
            newTuple.set(i, new Utf8(value.toString()));
          } else {
            throw new IllegalArgumentException("Field " + field.getName() + " of type " + field.getType()
                + " cannot contains values of class " + value.getClass().getCanonicalName());
          }
          break;
        default:
          newTuple.set(i, tuple.get(i));
View Full Code Here

  }

  public static Serializer[] getSerializers(Schema schema, Configuration conf) {
    Serializer[] result = new Serializer[schema.getFields().size()];
    for (int i = 0; i < result.length; i++) {
      Field field = schema.getField(i);
      if (field.getObjectSerialization() != null) {
        Serialization serialization = ReflectionUtils.newInstance(field.getObjectSerialization(), conf);
        if (serialization instanceof FieldConfigurable) {
          ((FieldConfigurable) serialization).setFieldProperties(null, field.getProps());
        }
        result[i] = serialization.getSerializer(field.getObjectClass());
      }
    }
    return result;
  }
View Full Code Here

TOP

Related Classes of com.datasalt.pangool.io.Schema.Field

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.