for(LogicalExpression op: list) {
if (!DataType.isUsableType(op.getType())) {
int errCode = 1014;
String msg = "Problem with input " + op + " of User-defined function: " + func;
msgCollector.collect(msg, MessageType.Error);
throw new TypeCheckerException(func, msg, errCode, PigException.INPUT) ;
}
try {
currentArgSchema.add(Util.translateFieldSchema(op.getFieldSchema()));
} catch (FrontendException e) {
int errCode = 1043;
String msg = "Unable to retrieve field schema.";
throw new TypeCheckerException(func, msg, errCode, PigException.INPUT, e);
}
}
EvalFunc<?> ef = (EvalFunc<?>) PigContext.instantiateFuncFromSpec(func.getFuncSpec());
// ask the EvalFunc what types of inputs it can handle
List<FuncSpec> funcSpecs = null;
try {
funcSpecs = ef.getArgToFuncMapping();
if (funcSpecs!=null) {
for (FuncSpec funcSpec : funcSpecs) {
Schema s = funcSpec.getInputArgsSchema();
LogicalSchema ls = Util.translateSchema(s);
ls.normalize();
funcSpec.setInputArgsSchema(Util.translateSchema(ls));
}
}
} catch (Exception e) {
int errCode = 1044;
String msg = "Unable to get list of overloaded methods.";
throw new TypeCheckerException(func, msg, errCode, PigException.INPUT, e);
}
/**
* Here is an explanation of the way the matching UDF funcspec will be chosen
* based on actual types in the input schema.
* First an "exact" match is tried for each of the fields in the input schema
* with the corresponding fields in the candidate funcspecs' schemas.
*
* If exact match fails, then first a check if made if the input schema has any
* bytearrays in it.
*
* If there are NO bytearrays in the input schema, then a best fit match is attempted
* for the different fields. Essential a permissible cast from one type to another
* is given a "score" based on its position in the "castLookup" table. A final
* score for a candidate funcspec is deduced as
* SUM(score_of_particular_cast*noOfCastsSoFar).
* If no permissible casts are possible, the score for the candidate is -1. Among
* the non -1 score candidates, the candidate with the lowest score is chosen.
*
* If there are bytearrays in the input schema, a modified exact match is tried. In this
* matching, bytearrays in the input schema are not considered. As a result of
* ignoring the bytearrays, we could get multiple candidate funcspecs which match
* "exactly" for the other columns - if this is the case, we notify the user of
* the ambiguity and error out. Else if all other (non byte array) fields
* matched exactly, then we can cast bytearray(s) to the corresponding type(s)
* in the matched udf schema. If this modified exact match fails, the above best fit
* algorithm is attempted by initially coming up with scores and candidate funcSpecs
* (with bytearray(s) being ignored in the scoring process). Then a check is
* made to ensure that the positions which have bytearrays in the input schema
* have the same type (for a given position) in the corresponding positions in
* all the candidate funcSpecs. If this is not the case, it indicates a conflict
* and the user is notified of the error (because we have more than
* one choice for the destination type of the cast for the bytearray). If this is the case,
* the candidate with the lowest score is chosen.
*/
FuncSpec matchingSpec = null;
boolean notExactMatch = false;
if(funcSpecs!=null && funcSpecs.size()!=0){
//Some function mappings found. Trying to see
//if one of them fits the input schema
if((matchingSpec = exactMatch(funcSpecs, currentArgSchema, func))==null){
//Oops, no exact match found. Trying to see if we
//have mappings that we can fit using casts.
notExactMatch = true;
if(byteArrayFound(func, currentArgSchema)){
// try "exact" matching all other fields except the byte array
// fields and if they all exact match and we have only one candidate
// for the byte array cast then that's the matching one!
if((matchingSpec = exactMatchWithByteArrays(funcSpecs, currentArgSchema, func))==null){
// "exact" match with byte arrays did not work - try best fit match
if((matchingSpec = bestFitMatchWithByteArrays(funcSpecs, currentArgSchema, func)) == null) {
int errCode = 1045;
String msg = "Could not infer the matching function for "
+ func.getFuncSpec()
+ " as multiple or none of them fit. Please use an explicit cast.";
msgCollector.collect(msg, MessageType.Error);
throw new TypeCheckerException(func, msg, errCode, PigException.INPUT);
}
}
} else if ((matchingSpec = bestFitMatch(funcSpecs, currentArgSchema)) == null) {
// Either no byte arrays found or there are byte arrays
// but only one mapping exists.
// However, we could not find a match as there were either
// none fitting the input schema or it was ambiguous.
// Throw exception that we can't infer a fit.
int errCode = 1045;
String msg = "Could not infer the matching function for "
+ func.getFuncSpec()
+ " as multiple or none of them fit. Please use an explicit cast.";
msgCollector.collect(msg, MessageType.Error);
throw new TypeCheckerException(func, msg, errCode, PigException.INPUT);
}
}
}
if(matchingSpec!=null){
//Voila! We have a fitting match. Lets insert casts and make