Package org.apache.hcatalog.data.schema

Examples of org.apache.hcatalog.data.schema.HCatFieldSchema


   */
  protected HCatSchema convertPigSchemaToHCatSchema(Schema pigSchema, HCatSchema tableSchema) throws FrontendException{
    List<HCatFieldSchema> fieldSchemas = new ArrayList<HCatFieldSchema>(pigSchema.size());
    for(FieldSchema fSchema : pigSchema.getFields()){
      try {
        HCatFieldSchema hcatFieldSchema = getColFromSchema(fSchema.alias, tableSchema);

        fieldSchemas.add(getHCatFSFromPigFS(fSchema, hcatFieldSchema));
      } catch (HCatException he){
          throw new FrontendException(he.getMessage(),PigHCatUtil.PIG_EXCEPTION_CODE,he);
      }
View Full Code Here


    byte type = fSchema.type;
    switch(type){

    case DataType.CHARARRAY:
    case DataType.BIGCHARARRAY:
      return new HCatFieldSchema(fSchema.alias, Type.STRING, null);

    case DataType.INTEGER:
      return new HCatFieldSchema(fSchema.alias, Type.INT, null);

    case DataType.LONG:
      return new HCatFieldSchema(fSchema.alias, Type.BIGINT, null);

    case DataType.FLOAT:
      return new HCatFieldSchema(fSchema.alias, Type.FLOAT, null);

    case DataType.DOUBLE:
      return new HCatFieldSchema(fSchema.alias, Type.DOUBLE, null);

    case DataType.BYTEARRAY:
      return new HCatFieldSchema(fSchema.alias, Type.BINARY, null);

    case DataType.BAG:
      Schema bagSchema = fSchema.schema;
      List<HCatFieldSchema> arrFields = new ArrayList<HCatFieldSchema>(1);
      FieldSchema field;
      // Find out if we need to throw away the tuple or not.
      if (removeTupleFromBag(hcatFieldSchema, fSchema)) {
        field = bagSchema.getField(0).schema.getField(0);
      } else {
        field = bagSchema.getField(0);
      }
      arrFields.add(getHCatFSFromPigFS(field, hcatFieldSchema == null ? null : hcatFieldSchema.getArrayElementSchema().get(0)));
      return new HCatFieldSchema(fSchema.alias, Type.ARRAY, new HCatSchema(arrFields), "");

    case DataType.TUPLE:
      List<String> fieldNames = new ArrayList<String>();
      List<HCatFieldSchema> hcatFSs = new ArrayList<HCatFieldSchema>();
      HCatSchema structSubSchema = hcatFieldSchema == null ? null : hcatFieldSchema.getStructSubSchema();
      List<FieldSchema> fields = fSchema.schema.getFields();
      for (int i = 0; i < fields.size(); i++) {
        FieldSchema fieldSchema = fields.get(i);
        fieldNames.add(fieldSchema.alias);
        hcatFSs.add(getHCatFSFromPigFS(fieldSchema, structSubSchema == null ? null : structSubSchema.get(i)));
      }
      return new HCatFieldSchema(fSchema.alias, Type.STRUCT, new HCatSchema(hcatFSs), "");

    case DataType.MAP:{
      // Pig's schema contain no type information about map's keys and
      // values. So, if its a new column assume <string,string> if its existing
      // return whatever is contained in the existing column.

      HCatFieldSchema valFS;
      List<HCatFieldSchema> valFSList = new ArrayList<HCatFieldSchema>(1);

      if(hcatFieldSchema != null){
        return new HCatFieldSchema(fSchema.alias, Type.MAP, Type.STRING, hcatFieldSchema.getMapValueSchema(), "");
      }

      // Column not found in target table. Its a new column. Its schema is map<string,string>
      valFS = new HCatFieldSchema(fSchema.alias, Type.STRING, "");
      valFSList.add(valFS);
      return new HCatFieldSchema(fSchema.alias,Type.MAP,Type.STRING, new HCatSchema(valFSList),"");
     }

    default:
      throw new FrontendException("Unsupported type: "+type+"  in Pig's schema", PigHCatUtil.PIG_EXCEPTION_CODE);
    }
View Full Code Here

        if (pigObj == null) {
          return null;
        }
        // Unwrap the bag.
        DataBag pigBag = (DataBag)pigObj;
        HCatFieldSchema tupFS = hcatFS.getArrayElementSchema().get(0);
        boolean needTuple = tupFS.getType() == Type.STRUCT;
        List<Object> bagContents = new ArrayList<Object>((int)pigBag.size());
        Iterator<Tuple> bagItr = pigBag.iterator();

        while(bagItr.hasNext()){
          // If there is only one element in tuple contained in bag, we throw away the tuple.
View Full Code Here

    // Iterate through all the elements in Pig Schema and do validations as
    // dictated by semantics, consult HCatSchema of table when need be.

    for(FieldSchema pigField : pigSchema.getFields()){
      HCatFieldSchema hcatField = getColFromSchema(pigField.alias, tblSchema);

      validateSchema(pigField, hcatField);
    }

    try {
View Full Code Here

    // we simulate the tuple by putting the single field in a tuple
    ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1];
    bagSubFieldSchemas[0] = new ResourceFieldSchema().setName("innertuple")
      .setDescription("The tuple in the bag")
      .setType(DataType.TUPLE);
    HCatFieldSchema arrayElementFieldSchema = hfs.getArrayElementSchema().get(0);
    if(arrayElementFieldSchema.getType() == Type.STRUCT) {
      bagSubFieldSchemas[0].setSchema(getTupleSubSchema(arrayElementFieldSchema));
    } else if(arrayElementFieldSchema.getType() == Type.ARRAY) {
      ResourceSchema s = new ResourceSchema();
      List<ResourceFieldSchema> lrfs = Arrays.asList(getResourceSchemaFromFieldSchema(arrayElementFieldSchema));
      s.setFields(lrfs.toArray(new ResourceFieldSchema[0]));
      bagSubFieldSchemas[0].setSchema(s);
    } else {
View Full Code Here

  public static DataBag transformToBag(List<? extends Object> list, HCatFieldSchema hfs) throws Exception {
    if (list == null){
      return null;
    }

    HCatFieldSchema elementSubFieldSchema = hfs.getArrayElementSchema().getFields().get(0);
    DataBag db = new DefaultDataBag();
    for (Object o : list){
      Tuple tuple;
      if (elementSubFieldSchema.getType() == Type.STRUCT){
        tuple = transformToTuple((List<Object>)o, elementSubFieldSchema);
      } else {
        // bags always contain tuples
        tuple = tupFac.newTuple(extractPigObject(o, elementSubFieldSchema));
      }
View Full Code Here

    if(writeToNonPartPigTable) {
        List<HCatFieldSchema> newHfsList = new ArrayList<HCatFieldSchema>();
        // change smallint and tinyint to int
        for(HCatFieldSchema hfs: s.getFields()){
            if(hfs.getTypeString().equals("smallint")) {
                newHfsList.add(new HCatFieldSchema(hfs.getName(),
                        HCatFieldSchema.Type.INT, hfs.getComment()));
            } else if(hfs.getTypeString().equals("tinyint")) {
                newHfsList.add(new HCatFieldSchema(hfs.getName(),
                        HCatFieldSchema.Type.INT, hfs.getComment()));
            } else {
                newHfsList.add(hfs);
            }
        }
View Full Code Here

      .getProperty("test.warehouse.dir", "/user/hive/warehouse");
    String expectedDir = org.apache.hive.hcatalog.api.TestHCatClient.fixPath(warehouseDir).
            replaceFirst("pfile:///", "pfile:/");
    assertEquals(expectedDir + "/" + db + ".db", testDb.getLocation());
    ArrayList<HCatFieldSchema> cols = new ArrayList<HCatFieldSchema>();
    cols.add(new HCatFieldSchema("id", Type.INT, "id comment"));
    cols.add(new HCatFieldSchema("value", Type.STRING, "value comment"));
    HCatCreateTableDesc tableDesc = HCatCreateTableDesc
      .create(db, tableOne, cols).fileFormat("rcfile").build();
    client.createTable(tableDesc);
    HCatTable table1 = client.getTable(db, tableOne);
    assertTrue(table1.getInputFileFormat().equalsIgnoreCase(
View Full Code Here

    HCatCreateDBDesc dbDesc = HCatCreateDBDesc.create(dbName)
      .ifNotExists(true).build();
    client.createDatabase(dbDesc);
    ArrayList<HCatFieldSchema> cols = new ArrayList<HCatFieldSchema>();
    cols.add(new HCatFieldSchema("userid", Type.INT, "id columns"));
    cols.add(new HCatFieldSchema("viewtime", Type.BIGINT,
      "view time columns"));
    cols.add(new HCatFieldSchema("pageurl", Type.STRING, ""));
    cols.add(new HCatFieldSchema("ip", Type.STRING,
      "IP Address of the User"));

    ArrayList<HCatFieldSchema> ptnCols = new ArrayList<HCatFieldSchema>();
    ptnCols.add(new HCatFieldSchema("dt", Type.STRING, "date column"));
    ptnCols.add(new HCatFieldSchema("country", Type.STRING,
      "country column"));
    HCatCreateTableDesc tableDesc = HCatCreateTableDesc
      .create(dbName, tableName, cols).fileFormat("sequencefile")
      .partCols(ptnCols).build();
    client.createTable(tableDesc);
View Full Code Here

    String cloneTable = "tabletwo";
    client.dropTable(null, tableName, true);
    client.dropTable(null, cloneTable, true);

    ArrayList<HCatFieldSchema> cols = new ArrayList<HCatFieldSchema>();
    cols.add(new HCatFieldSchema("id", Type.INT, "id columns"));
    cols.add(new HCatFieldSchema("value", Type.STRING, "id columns"));
    HCatCreateTableDesc tableDesc = HCatCreateTableDesc
      .create(null, tableName, cols).fileFormat("rcfile").build();
    client.createTable(tableDesc);
    // create a new table similar to previous one.
    client.createTableLike(null, tableName, cloneTable, true, false, null);
View Full Code Here

TOP

Related Classes of org.apache.hcatalog.data.schema.HCatFieldSchema

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.