/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.test;
import java.util.*;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import junit.framework.Assert;
import org.apache.pig.ExecType;
import org.apache.pig.FuncSpec;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.builtin.PigStorage;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.DefaultBagFactory;
import org.apache.pig.data.DefaultTuple;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.PigContext;
import org.apache.pig.impl.io.FileSpec;
import org.apache.pig.impl.plan.OperatorKey;
import org.apache.pig.PigServer;
import org.apache.pig.impl.io.FileLocalizer;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.POStatus;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.Result;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad;
import org.apache.pig.test.utils.GenPhyOp;
import org.apache.pig.test.utils.TestHelper;
import org.apache.pig.impl.logicalLayer.LOLoad;
import org.apache.pig.impl.logicalLayer.LogicalOperator;
import org.apache.pig.impl.logicalLayer.LogicalPlan;
import org.apache.pig.impl.logicalLayer.LogicalPlanBuilder;
import org.apache.pig.backend.datastorage.ContainerDescriptor;
import org.apache.pig.backend.datastorage.DataStorage;
import org.apache.pig.backend.datastorage.DataStorageException;
import org.apache.pig.backend.datastorage.ElementDescriptor;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
public class TestLoad extends junit.framework.TestCase {
FileSpec inpFSpec;
POLoad ld;
PigContext pc;
DataBag inpDB;
String curDir;
String inpDir;
PigServer pig;
static MiniCluster cluster = MiniCluster.buildCluster();
@Before
public void setUp() throws Exception {
curDir = System.getProperty("user.dir");
inpDir = curDir + File.separatorChar + "test/org/apache/pig/test/data/InputFiles/";
if ((System.getProperty("os.name").toUpperCase().startsWith("WINDOWS")))
inpDir="/"+FileLocalizer.parseCygPath(inpDir, FileLocalizer.STYLE_WINDOWS);
inpFSpec = new FileSpec("file:" + inpDir + "passwd", new FuncSpec(PigStorage.class.getName(), new String[]{":"}));
FileLocalizer.deleteTempFiles();
pig = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
pc = pig.getPigContext();
ld = GenPhyOp.topLoadOp();
ld.setLFile(inpFSpec);
ld.setPc(pc);
inpDB = DefaultBagFactory.getInstance().newDefaultBag();
BufferedReader br = new BufferedReader(new FileReader("test/org/apache/pig/test/data/InputFiles/passwd"));
for(String line = br.readLine();line!=null;line=br.readLine()){
String[] flds = line.split(":",-1);
Tuple t = new DefaultTuple();
for (String fld : flds) {
t.append((fld.compareTo("")!=0 ? new DataByteArray(fld.getBytes()) : null));
}
inpDB.add(t);
}
}
@After
public void tearDown() throws Exception {
}
@Test
public void testGetNextTuple() throws ExecException {
Tuple t=null;
int size = 0;
for(Result res = ld.getNext(t);res.returnStatus!=POStatus.STATUS_EOP;res=ld.getNext(t)){
assertEquals(true, TestHelper.bagContains(inpDB, (Tuple)res.result));
++size;
}
assertEquals(true, size==inpDB.size());
}
@Test
public void testLoadLocalRel() throws Exception {
checkLoadPath("file:test/org/apache/pig/test/data/passwd", "", true);
}
@Test
public void testLoadLocalAbs() throws Exception {
String filename = curDir + File.separatorChar+"test/org/apache/pig/test/data/passwd";
if ((System.getProperty("os.name").toUpperCase().startsWith("WINDOWS")))
{
filename="/"+FileLocalizer.parseCygPath(filename, FileLocalizer.STYLE_WINDOWS);
filename=Util.encodeEscape(filename);
}
checkLoadPath("file:"+filename, "", true);
}
@Test
public void testLoadRemoteRel() throws Exception {
checkLoadPath("test","/tmp/test");
}
@Test
public void testLoadRemoteAbs() throws Exception {
checkLoadPath("/tmp/test","/tmp/test");
}
@Test
public void testLoadRemoteRelScheme() throws Exception {
checkLoadPath("test","/tmp/test");
}
@Test
public void testLoadRemoteAbsScheme() throws Exception {
checkLoadPath("hdfs:/tmp/test","/tmp/test");
}
@Test
public void testLoadRemoteAbsAuth() throws Exception {
checkLoadPath("hdfs://localhost:9000/test","/test");
}
@Test
public void testLoadRemoteNormalize() throws Exception {
checkLoadPath("/tmp/foo/../././","/tmp");
}
@Test
public void testGlobChars() throws Exception {
checkLoadPath("t?s*","/tmp/t?s*");
}
private void checkLoadPath(String orig, String expected) throws Exception {
checkLoadPath(orig, expected, false);
}
private void checkLoadPath(String orig, String expected, boolean isTmp) throws Exception {
pc.getProperties().setProperty("opt.multiquery",""+true);
DataStorage dfs = pc.getDfs();
dfs.setActiveContainer(dfs.asContainer("/tmp"));
Map<LogicalOperator, LogicalPlan> aliases = new HashMap<LogicalOperator, LogicalPlan>();
Map<OperatorKey, LogicalOperator> logicalOpTable = new HashMap<OperatorKey, LogicalOperator>();
Map<String, LogicalOperator> aliasOp = new HashMap<String, LogicalOperator>();
Map<String, String> fileNameMap = new HashMap<String, String>();
LogicalPlanBuilder builder = new LogicalPlanBuilder(pc);
String query = "a = load '"+orig+"';";
LogicalPlan lp = builder.parse("Test-Load",
query,
aliases,
logicalOpTable,
aliasOp,
fileNameMap);
Assert.assertTrue(lp.size()>0);
LogicalOperator op = lp.getRoots().get(0);
Assert.assertTrue(op instanceof LOLoad);
LOLoad load = (LOLoad)op;
String p = load.getInputFile().getFileName();
p = p.replaceAll("hdfs://[0-9a-zA-Z:\\.]*/","/");
if (isTmp) {
Assert.assertTrue(p.matches("/tmp.*"));
} else {
Assert.assertEquals(p, expected);
}
}
}