Package org.apache.hcatalog.data.schema

Examples of org.apache.hcatalog.data.schema.HCatFieldSchema


  protected HCatSchema convertPigSchemaToHCatSchema(Schema pigSchema, HCatSchema tableSchema) throws FrontendException{

    List<HCatFieldSchema> fieldSchemas = new ArrayList<HCatFieldSchema>(pigSchema.size());
    for(FieldSchema fSchema : pigSchema.getFields()){
      byte type = fSchema.type;
      HCatFieldSchema hcatFSchema;

      try {

        // Find out if we need to throw away the tuple or not.
        if(type == DataType.BAG && removeTupleFromBag(tableSchema, fSchema)){
          List<HCatFieldSchema> arrFields = new ArrayList<HCatFieldSchema>(1);
          arrFields.add(getHCatFSFromPigFS(fSchema.schema.getField(0).schema.getField(0), tableSchema));
          hcatFSchema = new HCatFieldSchema(fSchema.alias, Type.ARRAY, new HCatSchema(arrFields), null);
      }
      else{
          hcatFSchema = getHCatFSFromPigFS(fSchema, tableSchema);
      }
      fieldSchemas.add(hcatFSchema);
View Full Code Here


    byte type = fSchema.type;
    switch(type){

    case DataType.CHARARRAY:
    case DataType.BIGCHARARRAY:
      return new HCatFieldSchema(fSchema.alias, Type.STRING, null);

    case DataType.INTEGER:
      return new HCatFieldSchema(fSchema.alias, Type.INT, null);

    case DataType.LONG:
      return new HCatFieldSchema(fSchema.alias, Type.BIGINT, null);

    case DataType.FLOAT:
      return new HCatFieldSchema(fSchema.alias, Type.FLOAT, null);

    case DataType.DOUBLE:
      return new HCatFieldSchema(fSchema.alias, Type.DOUBLE, null);

    case DataType.BAG:
      Schema bagSchema = fSchema.schema;
      List<HCatFieldSchema> arrFields = new ArrayList<HCatFieldSchema>(1);
      arrFields.add(getHCatFSFromPigFS(bagSchema.getField(0), hcatTblSchema));
      return new HCatFieldSchema(fSchema.alias, Type.ARRAY, new HCatSchema(arrFields), "");

    case DataType.TUPLE:
      List<String> fieldNames = new ArrayList<String>();
      List<HCatFieldSchema> hcatFSs = new ArrayList<HCatFieldSchema>();
      for( FieldSchema fieldSchema : fSchema.schema.getFields()){
        fieldNames.add( fieldSchema.alias);
        hcatFSs.add(getHCatFSFromPigFS(fieldSchema, hcatTblSchema));
      }
      return new HCatFieldSchema(fSchema.alias, Type.STRUCT, new HCatSchema(hcatFSs), "");

    case DataType.MAP:{
      // Pig's schema contain no type information about map's keys and
      // values. So, if its a new column assume <string,string> if its existing
      // return whatever is contained in the existing column.
      HCatFieldSchema mapField = getTableCol(fSchema.alias, hcatTblSchema);
      HCatFieldSchema valFS;
      List<HCatFieldSchema> valFSList = new ArrayList<HCatFieldSchema>(1);

      if(mapField != null){
        Type mapValType = mapField.getMapValueSchema().get(0).getType();

        switch(mapValType){
        case STRING:
        case BIGINT:
        case INT:
        case FLOAT:
        case DOUBLE:
          valFS = new HCatFieldSchema(fSchema.alias, mapValType, null);
          break;
        default:
          throw new FrontendException("Only pig primitive types are supported as map value types.", PigHCatUtil.PIG_EXCEPTION_CODE);
        }
        valFSList.add(valFS);
        return new HCatFieldSchema(fSchema.alias,Type.MAP,Type.STRING, new HCatSchema(valFSList),"");
      }

      // Column not found in target table. Its a new column. Its schema is map<string,string>
      valFS = new HCatFieldSchema(fSchema.alias, Type.STRING, "");
      valFSList.add(valFS);
      return new HCatFieldSchema(fSchema.alias,Type.MAP,Type.STRING, new HCatSchema(valFSList),"");
     }

    default:
      throw new FrontendException("Unsupported type: "+type+"  in Pig's schema", PigHCatUtil.PIG_EXCEPTION_CODE);
    }
View Full Code Here

      //      }
      //      return innerList;
    case ARRAY:
      // Unwrap the bag.
      DataBag pigBag = (DataBag)pigObj;
      HCatFieldSchema tupFS = hcatFS.getArrayElementSchema().get(0);
      boolean needTuple = tupFS.getType() == Type.STRUCT;
      List<Object> bagContents = new ArrayList<Object>((int)pigBag.size());
      Iterator<Tuple> bagItr = pigBag.iterator();

      while(bagItr.hasNext()){
        // If there is only one element in tuple contained in bag, we throw away the tuple.
View Full Code Here

    for(FieldSchema pigField : pigSchema.getFields()){
      byte type = pigField.type;
      String alias = pigField.alias;
      validateAlias(alias);
      HCatFieldSchema hcatField = getTableCol(alias, tblSchema);

      if(DataType.isComplex(type)){
        switch(type){

        case DataType.MAP:
          if(hcatField != null){
            if(hcatField.getMapKeyType() != Type.STRING){
              throw new FrontendException("Key Type of map must be String "+hcatField,  PigHCatUtil.PIG_EXCEPTION_CODE);
            }
            if(hcatField.getMapValueSchema().get(0).isComplex()){
              throw new FrontendException("Value type of map cannot be complex" + hcatField, PigHCatUtil.PIG_EXCEPTION_CODE);
            }
          }
          break;

        case DataType.BAG:
          // Only map is allowed as complex type in tuples inside bag.
          for(FieldSchema innerField : pigField.schema.getField(0).schema.getFields()){
            if(innerField.type == DataType.BAG || innerField.type == DataType.TUPLE) {
              throw new FrontendException("Complex types cannot be nested. "+innerField, PigHCatUtil.PIG_EXCEPTION_CODE);
            }
            validateAlias(innerField.alias);
          }
          if(hcatField != null){
            // Do the same validation for HCatSchema.
            HCatFieldSchema arrayFieldScehma = hcatField.getArrayElementSchema().get(0);
            Type hType = arrayFieldScehma.getType();
            if(hType == Type.STRUCT){
              for(HCatFieldSchema structFieldInBag : arrayFieldScehma.getStructSubSchema().getFields()){
                if(structFieldInBag.getType() == Type.STRUCT || structFieldInBag.getType() == Type.ARRAY){
                  throw new FrontendException("Nested Complex types not allowed "+ hcatField, PigHCatUtil.PIG_EXCEPTION_CODE);
                }
              }
            }
            if(hType == Type.MAP){
              if(arrayFieldScehma.getMapKeyType() != Type.STRING){
                throw new FrontendException("Key Type of map must be String "+hcatField, PigHCatUtil.PIG_EXCEPTION_CODE);
              }
              if(arrayFieldScehma.getMapValueSchema().get(0).isComplex()){
                throw new FrontendException("Value type of map cannot be complex "+hcatField, PigHCatUtil.PIG_EXCEPTION_CODE);
              }
            }
            if(hType == Type.ARRAY) {
              throw new FrontendException("Arrays cannot contain array within it. "+hcatField, PigHCatUtil.PIG_EXCEPTION_CODE);
View Full Code Here

      partKeys = partitionSchema.getFields();
      if (partKeys.size() != partitionValues.size()) {
        throw new IllegalArgumentException("Partition key size differs from partition value size");
      }
      for (int i = 0; i < partKeys.size(); ++i) {
        HCatFieldSchema partKey = partKeys.get(i);
        if (partKey.getType() != HCatFieldSchema.Type.STRING) {
          throw new IllegalArgumentException("Partition key type string is only supported");
        }
        partSpec.put(partKey.getName(), partitionValues.get(i));
      }
    }
    StorerInfo storerInfo = new StorerInfo(isdname, osdname, new Properties());
    HCatTableInfo outputInfo = HCatTableInfo.getOutputTableInfo(null, null, dbname, tablename,
        partSpec);
View Full Code Here

    // we simulate the tuple by putting the single field in a tuple
    ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1];
    bagSubFieldSchemas[0] = new ResourceFieldSchema().setName("innertuple")
      .setDescription("The tuple in the bag")
      .setType(DataType.TUPLE);
    HCatFieldSchema arrayElementFieldSchema = hfs.getArrayElementSchema().get(0);
    if(arrayElementFieldSchema.getType() == Type.STRUCT) {
      bagSubFieldSchemas[0].setSchema(getTupleSubSchema(arrayElementFieldSchema));
    } else {
      ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1];
      innerTupleFieldSchemas[0] = new ResourceFieldSchema().setName("innerfield")
        .setDescription("The inner field in the tuple in the bag")
View Full Code Here

  public static DataBag transformToBag(List<? extends Object> list, HCatFieldSchema hfs) throws Exception {
    if (list == null){
      return null;
    }

    HCatFieldSchema elementSubFieldSchema = hfs.getArrayElementSchema().getFields().get(0);
    if (elementSubFieldSchema.getType() == Type.STRUCT){
      DataBag db = new DefaultDataBag();
      for (Object o : list){
        db.add(transformToTuple((List<Object>)o,elementSubFieldSchema));
      }
      return db;
View Full Code Here

      }
  }

  private static void validateIsPigCompatibleArrayWithPrimitivesOrSimpleComplexTypes(
          HCatFieldSchema hfs) throws IOException {
      HCatFieldSchema subFieldSchema = hfs.getArrayElementSchema().getFields().get(0);
      if (subFieldSchema.getType() == Type.STRUCT){
          validateIsPigCompatibleStructWithPrimitives(subFieldSchema);
      }else if (subFieldSchema.getType() == Type.MAP) {
          validateIsPigCompatiblePrimitive(subFieldSchema.getMapValueSchema().getFields().get(0));
      }else {
          validateIsPigCompatiblePrimitive(subFieldSchema);
      }
  }
View Full Code Here

      String colName = e.getKey();
      String hfn = colName.toLowerCase();
      key.set(hfn);
      String javaColType = colTypesJava.get(key).toString();
      int sqlType = ((IntWritable) colTypesSql.get(key)).get();
      HCatFieldSchema field =
        hCatFullTableSchema.get(hfn);
      HCatFieldSchema.Type fieldType = field.getType();
      Object hCatVal =
        hcr.get(hfn, hCatFullTableSchema);
      String hCatTypeString = field.getTypeString();
      Object sqlVal = convertToSqoop(hCatVal, fieldType,
        javaColType, hCatTypeString);
      if (debugHCatExportMapper) {
        LOG.debug("hCatVal " + hCatVal + " of type "
          + (hCatVal == null ? null : hCatVal.getClass().getName())
View Full Code Here

    LOG.info("HCatalog table partitioning key fields = "
      + Arrays.toString(hCatPartitionSchema.getFieldNames().toArray()));

    List<HCatFieldSchema> outputFieldList = new ArrayList<HCatFieldSchema>();
    for (String col : dbColumnNames) {
      HCatFieldSchema hfs = hCatFullTableSchema.get(col);
      if (hfs == null) {
        throw new IOException("Database column " + col + " not found in "
          + " hcatalog table.");
      }
      if (hCatStaticPartitionKey != null
View Full Code Here

TOP

Related Classes of org.apache.hcatalog.data.schema.HCatFieldSchema

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.