Package org.apache.hive.hcatalog.data.schema

Examples of org.apache.hive.hcatalog.data.schema.HCatFieldSchema


            + ") for the column name are at odds");
      }
      // If we reached here, then we were successful at finding an alternate internal
      // column mapping, and we're about to proceed.
    }
    HCatFieldSchema hcatFieldSchema = s.getFields().get(fpos);
    Object currField = extractCurrentField(p, hcatFieldSchema, false);
    r.set(fpos, currField);
  }
View Full Code Here


      }
      if (valueToken != JsonToken.START_OBJECT) {
        throw new IOException("Start of Object expected");
      }
      Map<Object, Object> map = new LinkedHashMap<Object, Object>();
      HCatFieldSchema valueSchema = hcatFieldSchema.getMapValueSchema().get(0);
      while ((valueToken = p.nextToken()) != JsonToken.END_OBJECT) {
        Object k = getObjectOfCorrespondingPrimitiveType(p.getCurrentName(), hcatFieldSchema.getMapKeyTypeInfo());
        Object v;
        if (valueSchema.getType() == HCatFieldSchema.Type.STRUCT) {
          v = extractCurrentField(p, valueSchema, false);
        } else {
          v = extractCurrentField(p, valueSchema, true);
        }
View Full Code Here

    }
  }

  private HCatSchema getSchema() throws HCatException {
    HCatSchema schema = new HCatSchema(new ArrayList<HCatFieldSchema>());
    schema.append(new HCatFieldSchema("a0", HCatFieldSchema.Type.INT,
        ""));
    schema.append(new HCatFieldSchema("a1",
        HCatFieldSchema.Type.STRING, ""));
    schema.append(new HCatFieldSchema("a2",
        HCatFieldSchema.Type.STRING, ""));
    return schema;
  }
View Full Code Here

      LOG.debug("convertPigSchemaToHCatSchema(pigSchema,tblSchema)=(" + pigSchema + "," + tableSchema + ")");
    }
    List<HCatFieldSchema> fieldSchemas = new ArrayList<HCatFieldSchema>(pigSchema.size());
    for (FieldSchema fSchema : pigSchema.getFields()) {
      try {
        HCatFieldSchema hcatFieldSchema = getColFromSchema(fSchema.alias, tableSchema);
        //if writing to a partitioned table, then pigSchema will have more columns than tableSchema
        //partition columns are not part of tableSchema... e.g. TestHCatStorer#testPartColsInData()
//        HCatUtil.assertNotNull(hcatFieldSchema, "Nothing matching '" + fSchema.alias + "' found " +
//                "in target table schema", LOG);
        fieldSchemas.add(getHCatFSFromPigFS(fSchema, hcatFieldSchema, pigSchema, tableSchema));
View Full Code Here

    switch (type) {

    case DataType.CHARARRAY:
    case DataType.BIGCHARARRAY:
      if(hcatFieldSchema != null && hcatFieldSchema.getTypeInfo() != null) {
        return new HCatFieldSchema(fSchema.alias, hcatFieldSchema.getTypeInfo(), null);
      }
      return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.stringTypeInfo, null);
    case DataType.INTEGER:
      if (hcatFieldSchema != null) {
        if (!SUPPORTED_INTEGER_CONVERSIONS.contains(hcatFieldSchema.getType())) {
          throw new FrontendException("Unsupported type: " + type + "  in Pig's schema",
            PigHCatUtil.PIG_EXCEPTION_CODE);
        }
        return new HCatFieldSchema(fSchema.alias, hcatFieldSchema.getTypeInfo(), null);
      }
      return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.intTypeInfo, null);
    case DataType.LONG:
      return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.longTypeInfo, null);
    case DataType.FLOAT:
      return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.floatTypeInfo, null);
    case DataType.DOUBLE:
      return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.doubleTypeInfo, null);
    case DataType.BYTEARRAY:
      return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.binaryTypeInfo, null);
    case DataType.BOOLEAN:
      return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.booleanTypeInfo, null);
    case DataType.DATETIME:
      //Pig DATETIME can map to DATE or TIMESTAMP (see HCatBaseStorer#validateSchema()) which
      //is controlled by Hive target table information
      if(hcatFieldSchema != null && hcatFieldSchema.getTypeInfo() != null) {
        return new HCatFieldSchema(fSchema.alias, hcatFieldSchema.getTypeInfo(), null);
      }
      return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.timestampTypeInfo, null);
    case DataType.BIGDECIMAL:
      if(hcatFieldSchema != null && hcatFieldSchema.getTypeInfo() != null) {
        return new HCatFieldSchema(fSchema.alias, hcatFieldSchema.getTypeInfo(), null);
      }
      return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.decimalTypeInfo, null);
    case DataType.BAG:
      Schema bagSchema = fSchema.schema;
      List<HCatFieldSchema> arrFields = new ArrayList<HCatFieldSchema>(1);
      FieldSchema field;
      // Find out if we need to throw away the tuple or not.
      if (removeTupleFromBag(hcatFieldSchema, fSchema)) {
        field = bagSchema.getField(0).schema.getField(0);
      } else {
        field = bagSchema.getField(0);
      }
      arrFields.add(getHCatFSFromPigFS(field, hcatFieldSchema == null ? null : hcatFieldSchema
              .getArrayElementSchema().get(0), pigSchema, tableSchema));
      return new HCatFieldSchema(fSchema.alias, Type.ARRAY, new HCatSchema(arrFields), "");
    case DataType.TUPLE:
      List<HCatFieldSchema> hcatFSs = new ArrayList<HCatFieldSchema>();
      HCatSchema structSubSchema = hcatFieldSchema == null ? null : hcatFieldSchema.getStructSubSchema();
      List<FieldSchema> fields = fSchema.schema.getFields();
      for (int i = 0; i < fields.size(); i++) {
        FieldSchema fieldSchema = fields.get(i);
        hcatFSs.add(getHCatFSFromPigFS(fieldSchema, structSubSchema == null ? null : structSubSchema.get(i), pigSchema, tableSchema));
      }
      return new HCatFieldSchema(fSchema.alias, Type.STRUCT, new HCatSchema(hcatFSs), "");
    case DataType.MAP: {
      // Pig's schema contain no type information about map's keys and
      // values. So, if its a new column assume <string,string> if its existing
      // return whatever is contained in the existing column.

      HCatFieldSchema valFS;
      List<HCatFieldSchema> valFSList = new ArrayList<HCatFieldSchema>(1);

      if (hcatFieldSchema != null) {
        return HCatFieldSchema.createMapTypeFieldSchema(fSchema.alias, hcatFieldSchema.getMapKeyTypeInfo(),
          hcatFieldSchema.getMapValueSchema(), "");
      }

      // Column not found in target table. Its a new column. Its schema is map<string,string>
      valFS = new HCatFieldSchema(fSchema.alias, TypeInfoFactory.stringTypeInfo, "");
      valFSList.add(valFS);
      return HCatFieldSchema.createMapTypeFieldSchema(fSchema.alias,
        TypeInfoFactory.stringTypeInfo, new HCatSchema(valFSList), "");
    }
    case DataType.BIGINTEGER:
View Full Code Here

        return converted;

      case ARRAY:
        // Unwrap the bag.
        DataBag pigBag = (DataBag) pigObj;
        HCatFieldSchema tupFS = hcatFS.getArrayElementSchema().get(0);
        boolean needTuple = tupFS.getType() == Type.STRUCT;
        List<Object> bagContents = new ArrayList<Object>((int) pigBag.size());
        Iterator<Tuple> bagItr = pigBag.iterator();

        while (bagItr.hasNext()) {
          // If there is only one element in tuple contained in bag, we throw away the tuple.
View Full Code Here

    // Iterate through all the elements in Pig Schema and do validations as
    // dictated by semantics, consult HCatSchema of table when need be.
    int columnPos = 0;//helps with debug messages
    for (FieldSchema pigField : pigSchema.getFields()) {
      HCatFieldSchema hcatField = getColFromSchema(pigField.alias, tblSchema);
      validateSchema(pigField, hcatField, pigSchema, tblSchema, columnPos++);
    }

    try {
      PigHCatUtil.validateHCatTableSchemaFollowsPigRules(tblSchema);
View Full Code Here

    ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1];
    bagSubFieldSchemas[0] = new ResourceFieldSchema().setName(innerTupleName)
      .setDescription("The tuple in the bag")
      .setType(DataType.TUPLE);
    HCatFieldSchema arrayElementFieldSchema = hfs.getArrayElementSchema().get(0);
    if (arrayElementFieldSchema.getType() == Type.STRUCT) {
      bagSubFieldSchemas[0].setSchema(getTupleSubSchema(arrayElementFieldSchema));
    } else if (arrayElementFieldSchema.getType() == Type.ARRAY) {
      ResourceSchema s = new ResourceSchema();
      List<ResourceFieldSchema> lrfs = Arrays.asList(getResourceSchemaFromFieldSchema(arrayElementFieldSchema));
      s.setFields(lrfs.toArray(new ResourceFieldSchema[lrfs.size()]));
      bagSubFieldSchemas[0].setSchema(s);
    } else {
View Full Code Here

  private static DataBag transformToBag(List<?> list, HCatFieldSchema hfs) throws Exception {
    if (list == null) {
      return null;
    }

    HCatFieldSchema elementSubFieldSchema = hfs.getArrayElementSchema().getFields().get(0);
    DataBag db = new DefaultDataBag();
    for (Object o : list) {
      Tuple tuple;
      if (elementSubFieldSchema.getType() == Type.STRUCT) {
        tuple = transformToTuple((List<?>) o, elementSubFieldSchema);
      } else {
        // bags always contain tuples
        tuple = tupFac.newTuple(extractPigObject(o, elementSubFieldSchema));
      }
View Full Code Here

    bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas));
    ResourceSchema expected = new ResourceSchema().setFields(bagSubFieldSchemas);

    // Get the actual converted schema.
    HCatSchema hCatSchema = new HCatSchema(Lists.newArrayList(
      new HCatFieldSchema("innerLlama", HCatFieldSchema.Type.STRING, null)));
    HCatFieldSchema hCatFieldSchema =
      new HCatFieldSchema("llama", HCatFieldSchema.Type.ARRAY, hCatSchema, null);
    ResourceSchema actual = PigHCatUtil.getBagSubSchema(hCatFieldSchema);

    Assert.assertEquals(expected.toString(), actual.toString());
  }
View Full Code Here

TOP

Related Classes of org.apache.hive.hcatalog.data.schema.HCatFieldSchema

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.