/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.hive.hcatalog.pig;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import junit.framework.TestCase;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.hive.cli.CliSessionState;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.CommandNeedRetryException;
import org.apache.hadoop.hive.ql.Driver;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.OutputCommitter;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hive.hcatalog.HcatTestUtils;
import org.apache.hive.hcatalog.common.HCatConstants;
import org.apache.hive.hcatalog.common.HCatContext;
import org.apache.hive.hcatalog.data.HCatRecord;
import org.apache.hive.hcatalog.data.schema.HCatSchema;
import org.apache.hive.hcatalog.mapreduce.HCatInputFormat;
import org.apache.hive.hcatalog.mapreduce.HCatOutputFormat;
import org.apache.hive.hcatalog.mapreduce.OutputJobInfo;
import org.apache.hive.hcatalog.mapreduce.HCatMapRedUtil;
import org.apache.pig.ExecType;
import org.apache.pig.PigServer;
import org.apache.pig.data.Tuple;
public class TestE2EScenarios extends TestCase {
private static final String TEST_DATA_DIR = System.getProperty("user.dir") +
"/build/test/data/" + TestHCatLoader.class.getCanonicalName();
private static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse";
private static final String TEXTFILE_LOCN = TEST_DATA_DIR + "/textfile";
private static Driver driver;
protected String storageFormat() {
return "orc";
}
@Override
protected void setUp() throws Exception {
File f = new File(TEST_WAREHOUSE_DIR);
if (f.exists()) {
FileUtil.fullyDelete(f);
}
new File(TEST_WAREHOUSE_DIR).mkdirs();
HiveConf hiveConf = new HiveConf(this.getClass());
hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, "");
hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, "");
hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false");
hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, TEST_WAREHOUSE_DIR);
driver = new Driver(hiveConf);
SessionState.start(new CliSessionState(hiveConf));
}
@Override
protected void tearDown() throws Exception {
dropTable("inpy");
dropTable("rc5318");
dropTable("orc5318");
}
private void dropTable(String tablename) throws IOException, CommandNeedRetryException {
driver.run("drop table " + tablename);
}
private void createTable(String tablename, String schema, String partitionedBy, String storageFormat) throws IOException, CommandNeedRetryException {
String createTable;
createTable = "create table " + tablename + "(" + schema + ") ";
if ((partitionedBy != null) && (!partitionedBy.trim().isEmpty())) {
createTable = createTable + "partitioned by (" + partitionedBy + ") ";
}
if (storageFormat != null){
createTable = createTable + "stored as " +storageFormat;
}
driverRun(createTable);
}
private void driverRun(String cmd) throws IOException, CommandNeedRetryException {
int retCode = driver.run(cmd).getResponseCode();
if (retCode != 0) {
throw new IOException("Failed to run ["
+ cmd + "], return code from hive driver : [" + retCode + "]");
}
}
private void pigDump(String tableName) throws IOException {
PigServer server = new PigServer(ExecType.LOCAL);
System.err.println("===");
System.err.println(tableName+":");
server.registerQuery("X = load '" + tableName
+ "' using org.apache.hive.hcatalog.pig.HCatLoader();");
Iterator<Tuple> XIter = server.openIterator("X");
while (XIter.hasNext()) {
Tuple t = XIter.next();
for (Object o : t.getAll()){
System.err.print(
"\t(" + o.getClass().getName() + ":"
+ o.toString() + ")"
);
}
System.err.println("");
}
System.err.println("===");
}
private void copyTable(String in, String out) throws IOException, InterruptedException {
Job ijob = new Job();
Job ojob = new Job();
HCatInputFormat inpy = new HCatInputFormat();
inpy.setInput(ijob , null, in);
HCatOutputFormat oupy = new HCatOutputFormat();
oupy.setOutput(ojob,
OutputJobInfo.create(null, out, new HashMap<String,String>()
));
// Test HCatContext
System.err.println("HCatContext INSTANCE is present : " +HCatContext.INSTANCE.getConf().isPresent());
if (HCatContext.INSTANCE.getConf().isPresent()){
System.err.println("HCatContext tinyint->int promotion says " +
HCatContext.INSTANCE.getConf().get().getBoolean(
HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION,
HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT));
}
HCatSchema tableSchema = inpy.getTableSchema(ijob.getConfiguration());
System.err.println("Copying from ["+in+"] to ["+out+"] with schema : "+ tableSchema.toString());
oupy.setSchema(ojob, tableSchema);
oupy.checkOutputSpecs(ojob);
OutputCommitter oc = oupy.getOutputCommitter(createTaskAttemptContext(ojob.getConfiguration()));
oc.setupJob(ojob);
for (InputSplit split : inpy.getSplits(ijob)){
TaskAttemptContext rtaskContext = createTaskAttemptContext(ijob.getConfiguration());
TaskAttemptContext wtaskContext = createTaskAttemptContext(ojob.getConfiguration());
RecordReader<WritableComparable, HCatRecord> rr = inpy.createRecordReader(split, rtaskContext);
rr.initialize(split, rtaskContext);
OutputCommitter taskOc = oupy.getOutputCommitter(wtaskContext);
taskOc.setupTask(wtaskContext);
RecordWriter<WritableComparable<?>, HCatRecord> rw = oupy.getRecordWriter(wtaskContext);
while(rr.nextKeyValue()){
rw.write(rr.getCurrentKey(), rr.getCurrentValue());
}
rw.close(wtaskContext);
taskOc.commitTask(wtaskContext);
rr.close();
}
oc.commitJob(ojob);
}
private TaskAttemptContext createTaskAttemptContext(Configuration tconf) {
Configuration conf = (tconf == null) ? (new Configuration()) : tconf;
TaskAttemptID taskId = new TaskAttemptID();
conf.setInt("mapred.task.partition", taskId.getId());
conf.set("mapred.task.id", "attempt__0000_r_000000_" + taskId.getId());
TaskAttemptContext rtaskContext = HCatMapRedUtil.createTaskAttemptContext(conf , taskId);
return rtaskContext;
}
public void testReadOrcAndRCFromPig() throws Exception {
String tableSchema = "ti tinyint, si smallint,i int, bi bigint, f float, d double, b boolean";
HcatTestUtils.createTestDataFile(TEXTFILE_LOCN,
new String[]{
"-3\0019001\00186400\0014294967297\00134.532\0012184239842983489.1231231234\001true"
,"0\0010\0010\0010\0010\0010\001false"
}
);
// write this out to a file, and import it into hive
createTable("inpy",tableSchema,null,"textfile");
createTable("rc5318",tableSchema,null,"rcfile");
createTable("orc5318",tableSchema,null,"orc");
driverRun("LOAD DATA LOCAL INPATH '"+TEXTFILE_LOCN+"' OVERWRITE INTO TABLE inpy");
// write it out from hive to an rcfile table, and to an orc table
// driverRun("insert overwrite table rc5318 select * from inpy");
copyTable("inpy","rc5318");
// driverRun("insert overwrite table orc5318 select * from inpy");
copyTable("inpy","orc5318");
pigDump("inpy");
pigDump("rc5318");
pigDump("orc5318");
}
}