Package org.apache.pig.scripting.jruby

Source Code of org.apache.pig.scripting.jruby.RubySchema

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.pig.scripting.jruby;

import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.Arrays;

import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.impl.util.Utils;
import org.apache.pig.data.DataType;
import org.apache.pig.parser.ParserException;
import org.apache.pig.impl.logicalLayer.FrontendException;

import org.jruby.Ruby;
import org.jruby.RubyHash;
import org.jruby.RubyArray;
import org.jruby.RubyClass;
import org.jruby.RubyFixnum;
import org.jruby.RubyModule;
import org.jruby.RubyObject;
import org.jruby.RubyRange;
import org.jruby.RubyString;
import org.jruby.RubySymbol;
import org.jruby.anno.JRubyClass;
import org.jruby.anno.JRubyMethod;
import org.jruby.runtime.ObjectAllocator;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.Block;
import org.jruby.runtime.builtin.IRubyObject;

//TODO implement all of the merge functions

/**
* This class encapsulated a native Schema object, and provides a more convenient
* interface for manipulating Schemas. It hides the Schema/FieldSchema distinction
* from the user, and tries to present a cleaner, more Ruby-esque API to the user.
* For general information on JRuby's API definition annotations,
* see {@link RubyDataBag}.
*/
@JRubyClass(name = "Schema")
public class RubySchema extends RubyObject {

    private static final long serialVersionUID = 1L;

    /**
     * This is a pattern used in the conversion from ruby arguments to a valid Schema. It detects
     * cases where there is a bag, map, or tuple without being followed by {}, [], or () respectively.
     * It is used for convenience.
     */
    private static final Pattern bmtPattern = Pattern.compile("(?:\\S+:)?(bag|map|tuple)\\s*(?:,|$)", Pattern.CASE_INSENSITIVE);

    /**
     * This is the encapsulated Schema object.
     */
    private Schema internalSchema;

    private static final ObjectAllocator ALLOCATOR = new ObjectAllocator() {
        public IRubyObject allocate(Ruby runtime, RubyClass klass) {
            return new RubySchema(runtime, klass);
        }
    };

    /**
     * This method registers the class with the given runtime.
     *
     * @param runtime an instance of the Ruby runtime
     * @return        a RubyClass object with metadata about the registered class
     */
    public static RubyClass define(Ruby runtime) {
        RubyClass result = runtime.defineClass("Schema",runtime.getObject(), ALLOCATOR);

        result.kindOf = new RubyModule.KindOf() {
            public boolean isKindOf(IRubyObject obj, RubyModule type) {
                return obj instanceof RubySchema;
            }
        };

        result.includeModule(runtime.getEnumerable());

        result.defineAnnotatedMethods(RubySchema.class);

        return result;
    }

    protected RubySchema(final Ruby ruby, RubyClass rc) {
        super(ruby,rc);
        internalSchema = new Schema();
    }

    /**
     * This constructor sets the encapsulated Schema to be equal to
     * the given Schema. If copy is true, it is set equal to a copy.
     * If it is false, it is set directly equal.
     *
     * @param ruby an instance of the ruby runtime
     * @param rc   an instance of the class object with meatadata
     * @param s    a Schema to encapsulate
     * @param copy a boolean value. If true, s will be copied and the copy
     *             will be encapsulated. If false, it will be encapsulated
     *             directly.
     */
    protected RubySchema(final Ruby ruby, RubyClass rc, Schema s, boolean copy) {
        super(ruby,rc);
        if (copy) {
            internalSchema = new Schema(s);
        } else {
            internalSchema = s;
        }
    }

    /**
     * This constructor sets the encapsulated Schema to be equal to the
     * given Schema.
     *
     * @param ruby an instance of the ruby runtime
     * @param rc   an instance of the class object with meatadata
     * @param s    a Schema to encapsulate
     */
    protected RubySchema(final Ruby ruby, RubyClass rc, Schema s) {
        this(ruby, rc, s, true);
    }

    /**
     * This constructor is provided for convenience and sets the
     * internal Schema equal to the result of a call to
     * {@link Utils#getSchemaFromString}.
     *
     * @param ruby an instance of the ruby runtime
     * @param rc   an instance of the class object with meatadata
     * @param s    a String which will be passed to
     *             {@link Utils#getSchemaFromString}
     */
    protected RubySchema(final Ruby ruby, RubyClass rc, String s) {
        super(ruby, rc);
        try {
            internalSchema = Utils.getSchemaFromString(s);
        } catch (ParserException e) {
            throw new RuntimeException("Error converting String to Schema: " + s, e);
        }
    }

    /**
     * The ruby initializer accepts any number of arguments. With no arguments,
     * it will return an empty Schema object. It can accept any number of arguments.
     * To understand the valid arguments, see the documentation for {@link #rubyArgToSchema}.
     *
     * @param args a varargs which can take any number of valid arguments to
     *             {@link #rubyArgToSchema}
     * @return     the initialized RubySchema
     */
    @JRubyMethod(rest = true)
    public RubySchema initialize(IRubyObject[] args) {
        internalSchema = new Schema();
        for (IRubyObject arg : args) {
            Schema rs = rubyArgToSchema(arg);
            for (Schema.FieldSchema i : rs.getFields())
                internalSchema.add(i);
        }
        RubySchema.fixSchemaNames(internalSchema);
        return this;
    }

    /**
     * This is a static helper method to create a null aliased bytearray Schema.
     * This is useful in cases where you do not want the output to have an explicit
     * name, which {@link Utils#getSchemaFromString} will assign.
     *
     * @param context the context the method is being executed in
     * @param self    an instance of the RubyClass with metadata on
     *                the Ruby class object this method is being
     *                statically invoked against
     * @return        a null-aliased bytearray schema
     */
    @JRubyMethod(meta = true, name = {"by", "bytearray"})
    public static RubySchema nullBytearray(ThreadContext context, IRubyObject self) {
       return makeNullAliasRubySchema(context, DataType.BYTEARRAY);
    }

    /**
     * This is a static helper method to create a null aliased Boolean Schema.
     * This is useful in cases where you do not want the output to have an explicit
     * name, which {@link Utils#getSchemaFromString} will assign.
     *
     * @param context the context the method is being executed in
     * @param self    an instance of the RubyClass with metadata on
     *                the Ruby class object this method is being
     *                statically invoked against
     * @return        a null-aliased bytearray schema
     */
    @JRubyMethod(meta = true, name = {"bool", "boolean"})
    public static RubySchema nullBoolean(ThreadContext context, IRubyObject self) {
       return makeNullAliasRubySchema(context, DataType.BOOLEAN);
    }


    /**
     * This is a static helper method to create a null aliased chararray Schema.
     * This is useful in cases where you do not want the output to have an explicit
     * name, which {@link Utils#getSchemaFromString} will assign.
     *
     * @param context the context the method is being executed in
     * @param self    an instance of the RubyClass with metadata on
     *                the Ruby class object this method is being
     *                statically invoked against
     * @return        a null-aliased bytearray schema
     */
    @JRubyMethod(meta = true, name = {"c", "chararray"})
    public static RubySchema nullChararray(ThreadContext context, IRubyObject self) {
       return makeNullAliasRubySchema(context, DataType.CHARARRAY);
    }

    /**
     * This is a static helper method to create a null aliased long Schema.
     * This is useful in cases where you do not want the output to have an explicit
     * name, which {@link Utils#getSchemaFromString} will assign.
     *
     * @param context the context the method is being executed in
     * @param self    an instance of the RubyClass with metadata on
     *                the Ruby class object this method is being
     *                statically invoked against
     * @return        a null-aliased bytearray schema
     */
   @JRubyMethod(meta = true, name = {"l", "long"})
    public static RubySchema nullLong(ThreadContext context, IRubyObject self) {
       return makeNullAliasRubySchema(context, DataType.LONG);
    }

    /**
     * This is a static helper method to create a null aliased int Schema.
     * This is useful in cases where you do not want the output to have an explicit
     * name, which {@link Utils#getSchemaFromString} will assign.
     *
     * @param context the context the method is being executed in
     * @param self    an instance of the RubyClass with metadata on
     *                the Ruby class object this method is being
     *                statically invoked against
     * @return        a null-aliased bytearray schema
     */
    @JRubyMethod(meta = true, name = {"i", "int"})
    public static RubySchema nullInt(ThreadContext context, IRubyObject self) {
       return makeNullAliasRubySchema(context, DataType.INTEGER);
    }

    /**
     * This is a static helper method to create a null aliased double Schema.
     * This is useful in cases where you do not want the output to have an explicit
     * name, which {@link Utils#getSchemaFromString} will assign.
     *
     * @param context the context the method is being executed in
     * @param self    an instance of the RubyClass with metadata on
     *                the Ruby class object this method is being
     *                statically invoked against
     * @return        a null-aliased bytearray schema
     */
    @JRubyMethod(meta = true, name = {"d", "double"})
    public static RubySchema nullDouble(ThreadContext context, IRubyObject self) {
       return makeNullAliasRubySchema(context, DataType.DOUBLE);
    }

    /**
     * This is a static helper method to create a null aliased float Schema.
     * This is useful in cases where you do not want the output to have an explicit
     * name, which {@link Utils#getSchemaFromString} will assign.
     *
     * @param context the context the method is being executed in
     * @param self    an instance of the RubyClass with metadata on
     *                the Ruby class object this method is being
     *                statically invoked against
     * @return        a null-aliased bytearray schema
     */
    @JRubyMethod(meta = true, name = {"f", "float"})
    public static RubySchema nullFloate(ThreadContext context, IRubyObject self) {
       return makeNullAliasRubySchema(context, DataType.FLOAT);
    }

    /**
     * This is a static helper method to create a null aliased datetime Schema.
     * This is useful in cases where you do not want the output to have an explicit
     * name, which {@link Utils#getSchemaFromString} will assign.
     *
     * @param context the context the method is being executed in
     * @param self    an instance of the RubyClass with metadata on
     *                the Ruby class object this method is being
     *                statically invoked against
     * @return        a null-aliased bytearray schema
     */
    @JRubyMethod(meta = true, name = {"dt", "datetime"})
    public static RubySchema nullDateTime(ThreadContext context, IRubyObject self) {
       return makeNullAliasRubySchema(context, DataType.DATETIME);
    }

    /**
     * This is a static helper method to create a null aliased tuple Schema.
     * This is useful in cases where you do not want the output to have an explicit
     * name, which {@link Utils#getSchemaFromString} will assign.
     *
     * @param context the context the method is being executed in
     * @param self    an instance of the RubyClass with metadata on
     *                the Ruby class object this method is being
     *                statically invoked against
     * @return        a null-aliased bytearray schema
     */
    @JRubyMethod(meta = true, name = {"t", "tuple"})
    public static RubySchema nullTuple(ThreadContext context, IRubyObject self) {
       return makeNullAliasRubySchema(context, DataType.TUPLE);
    }

    /**
     * This is a static helper method to create a null aliased bag Schema.
     * This is useful in cases where you do not want the output to have an explicit
     * name, which {@link Utils#getSchemaFromString} will assign.
     *
     * @param context the context the method is being executed in
     * @param self    an instance of the RubyClass with metadata on
     *                the Ruby class object this method is being
     *                statically invoked against
     * @return        a null-aliased bytearray schema
     */
    @JRubyMethod(meta = true, name = {"b", "bag"})
    public static RubySchema nullBag(ThreadContext context, IRubyObject self) {
       return makeNullAliasRubySchema(context, DataType.BAG);
    }

    /**
     * This is a static helper method to create a null aliased map Schema.
     * This is useful in cases where you do not want the output to have an explicit
     * name, which {@link Utils#getSchemaFromString} will assign.
     *
     * @param context the context the method is being executed in
     * @param self    an instance of the RubyClass with metadata on
     *                the Ruby class object this method is being
     *                statically invoked against
     * @return        a null-aliased bytearray schema
     */
    @JRubyMethod(meta = true, name = {"m", "map"})
    public static RubySchema nullMap(ThreadContext context, IRubyObject self) {
       return makeNullAliasRubySchema(context, DataType.MAP);
    }

    /**
     * This is a helper method to generate a RubySchema of the given type without an alias.
     *
     * @param context the context the method is being executed in
     * @param type    the DataType.PIGTYPE value to make the Schema from
     * @return        a RubySchema object encapsulated a Schema of the specified type
     */
    private static RubySchema makeNullAliasRubySchema(ThreadContext context, byte type) {
       Ruby runtime = context.getRuntime();
       return new RubySchema(runtime, runtime.getClass("Schema"), new Schema(new Schema.FieldSchema(null, type)));
    }

    /**
     * This is a helper function which converts objects into Schema objects. The valid
     * options are as follows:
     * <p>
     * A RubyString, which will have {@link Utils#getSchemaFromString} called on it, and
     * it will be added.
     * <p>
     * A RubySchema, which will be added directly. IMPORTANT NOTE: since this API abstracts
     * away from the distinction between Schema/FieldSchema, its important to understand
     * how a Schema is added to another. In this case, the FieldSchema is pulled directly
     * out of the given Schema. Thus, where in Pig a Schema.FieldSchema might be passed around,
     * internally to this class, generally a Schema will be passed around encapsulating it.
     * <p>
     * A list will create the Schema for a Tuple whose elements will be the elements of the
     * list. Each element will be subjected to the same rules applied here.
     * <p>
     * A hash in the form of:<br>
     * <code>{"name:tuple"=>["x:int","y:int,z:int"], "name2:bag"=>["a:chararray"]}</code><br>
     * The keys must be a tuple, bag, or map, and the value must be an array.
     *
     * @param arg an object (generally an IRubyObject or String) to convert. See above for
                  the rules on valid arguments
     * @return    the Schema constructed for the given argument
     */
    public static Schema rubyArgToSchema(Object arg) {
        try {
            /**
             * Given a String or a RubyString, calls {@link Utils#getSchemaFromString}.
             * Additionally, as a convenience to the user, this method uses a regex to
             * detect any case where a schema declaration of "bag", "tuple", or "map"
             * does not have the trailing "{}", "()", or "[]" that
             * {@link Utils#getSchemaFromString} requires.
             */
            if (arg instanceof String || arg instanceof RubyString) {
                String s = arg.toString();
                Matcher m = bmtPattern.matcher(s);
                while (m.find()) {
                    String type = m.group(1);
                    String inter = s.substring(0, m.start(1));

                    if (type.equalsIgnoreCase("bag")) {
                         inter += "{}";
                    } else if (type.equalsIgnoreCase("map")) {
                         inter += "[]";
                    } else if (type.equalsIgnoreCase("tuple")) {
                         inter += "()";
                    } else {
                        throw new RuntimeException("Arriving here should be impossible");
                    }

                    s = inter + s.substring(m.end(1));
                    m = bmtPattern.matcher(s);
                }
                return Utils.getSchemaFromString(s);
            // In the case of a RubySchema, can just return the encapsulated Schema
            } else if (arg instanceof RubySchema) {
                return ((RubySchema)arg).getInternalSchema();
            // In the case of a RubyArray, the elements of the array are passed to this
            // method, and they will be treated as elements of a Tuple Schema.
            } else if (arg instanceof RubyArray) {
                RubyArray ary = (RubyArray)arg;
                Schema s = new Schema();
                for (Object o : ary) {
                    Schema ts = rubyArgToSchema(o);
                    for (Schema.FieldSchema fs : ts.getFields()) {
                      s.add(fs);
                    }
                }
                return new Schema(new Schema.FieldSchema("tuple_0", s, DataType.TUPLE));
            /**
             * In the case of a RubyHash, the key serves defines a Schema that will encapsulate
             * other elements. This mainly is for the convenience of being able to name
             * bags, maps, and tuples while easily being able to have interchangeable elements.
             * The key will be given to this method, but must return a singular map, tuple, or
             * bag, or an error will be thrown. The value to that key must be an array, and
             * each element will be passed to this method and then added to the Schema for
             * the key.
             */
            } else if (arg instanceof RubyHash) {
                RubyHash hash = (RubyHash)arg;
                Schema hashSchema = new Schema();
                for (Object o : hash.keySet()) {
                    Schema s = rubyArgToSchema(o);
                    if (s.size() != 1) {
                        throw new RuntimeException("Hash key must be singular");
                    }
                    Schema.FieldSchema fs = s.getField(0);
                    Object v = hash.get(o);
                    if (v instanceof RubyArray) {
                        byte type = fs.type;
                        if (type == DataType.BAG) {
                            fs.schema = rubyArgToSchema(v);
                        } else if (type == DataType.TUPLE || type == DataType.MAP) {
                            fs.schema = rubyArgToSchema(v).getField(0).schema;
                        } else {
                            throw new RuntimeException("Hash key must be tuple map or bag");
                        }
                    } else {
                        throw new RuntimeException("Hash value must be an Array");
                    }
                    hashSchema.add(fs);
                }
                return hashSchema;
            } else {
                throw new RuntimeException("Bad argument given to rubyToSchema: " + arg + (arg != null ? " class type " + arg.getClass().toString() : ""));
            }
        } catch (IOException e) {
            throw new RuntimeException("Error converting ruby to Schema: " + arg, e);
        }
    }

    /**
     * This is a ruby method which takes a name and an array of arguments and constructs a Tuple schema
     * from them.
     *
     * @param context the context the method is being executed in
     * @param self    the RubyClass for the Class object this was invoked on
     * @param arg1    the name for the RubySchema
     * @param arg2    a list of arguments to instantiate the new RubySchema
     * @return        the new Tuple RubySchema
     */
    @JRubyMethod(meta = true, name = {"t", "tuple"})
    public static RubySchema tuple(ThreadContext context, IRubyObject self, IRubyObject arg1, IRubyObject arg2) {
         RubySchema rs = tuple(context, self, arg2);
         rs.setNameIf(arg1);
         return rs;
    }

    /**
     * This is a ruby method which takes an array of arguments and constructs a Tuple schema from them. The name
     * will be set automatically.
     *
     * @param context the context the method is being executed in
     * @param self    the RubyClass for the Class object this was invoked on
     * @param arg     a list of arguments to instantiate the new RubySchema
     * @return        the new RubySchema
     */
    @JRubyMethod(meta = true, name = {"t", "tuple"})
    public static RubySchema tuple(ThreadContext context, IRubyObject self, IRubyObject arg) {
        if (arg instanceof RubyArray) {
            Schema s = rubyArgToSchema(arg);
            Ruby runtime = context.getRuntime();
            return new RubySchema(runtime, runtime.getClass("Schema"), s);
        } else {
            throw new RuntimeException("Bad argument given to Schema.tuple");
        }
    }

    /**
     * This is a ruby method which takes a name and an array of arguments and constructs a Map schema
     * from them.
     *
     * @param context the context the method is being executed in
     * @param self    the RubyClass for the Class object this was invoked on
     * @param arg1    the name for the RubySchema
     * @param arg2    a list of arguments to instantiate the new RubySchema
     * @return        the new RubySchema
     */
    @JRubyMethod(meta = true, name = {"m", "map"})
    public static RubySchema map(ThreadContext context, IRubyObject self, IRubyObject arg1, IRubyObject arg2) {
         RubySchema rs = map(context, self, arg2);
         rs.setNameIf(arg1);
         return rs;
    }

    /**
     * This is a ruby method which takes an array of arguments and constructs a Map schema from them. The name
     * will be set automatically.
     *
     * @param context the context the method is being executed in
     * @param self    the RubyClass for the Class object this was invoked on
     * @param arg     a list of arguments to instantiate the new RubySchema
     * @return        the new RubySchema
     */
    @JRubyMethod(meta = true, name = {"m", "map"})
    public static RubySchema map(ThreadContext context, IRubyObject self, IRubyObject arg) {
        Schema s = tuple(context, self, arg).getInternalSchema();
        Ruby runtime = context.getRuntime();
        try {
            return new RubySchema(runtime, runtime.getClass("Schema"), new Schema(new Schema.FieldSchema("map_0", s.getField(0).schema, DataType.MAP)));
        } catch (FrontendException e) {
            throw new RuntimeException("Error making map", e);
        }
    }

    /**
     * This is a ruby method which takes a name and an array of arguments and constructs a Bag schema
     * from them.
     *
     * @param context the context the method is being executed in
     * @param self    the RubyClass for the Class object this was invoked on
     * @param arg1    the name for the RubySchema
     * @param arg2    a list of arguments to instantiate the new RubySchema
     * @return        the new RubySchema
     */
    @JRubyMethod(meta = true, name={"b", "bag"})
    public static RubySchema bag(ThreadContext context, IRubyObject self, IRubyObject arg1, IRubyObject arg2) {
         RubySchema rs = bag(context, self, arg2);
         rs.setNameIf(arg1);
         return rs;
    }

    /**
     * This is a ruby method which takes an array of arguments and constructs a Bag schema from them. The name
     * will be set automatically.
     *
     * @param context the context the method is being executed in
     * @param self    the RubyClass for the Class object this was invoked on
     * @param arg     a list of arguments to instantiate the new RubySchema
     * @return        the new RubySchema
     */
    @JRubyMethod(meta = true, name = {"b", "bag"})
    public static RubySchema bag(ThreadContext context, IRubyObject self, IRubyObject arg) {
        Schema s = tuple(context, self, arg).getInternalSchema();
        Ruby runtime = context.getRuntime();
        try {
            return new RubySchema(runtime, runtime.getClass("Schema"), new Schema(new Schema.FieldSchema("bag_0", s, DataType.BAG)));
        } catch (FrontendException e) {
            throw new RuntimeException("Error making map", e);
        }
    }

    /**
     * This method will fix any name conflicts in a schema. It's important to note that
     * this will change the Schema object itself. It will deal with any collisions in things
     * named tuple_#, bag_#, map_#, or val_#, as these are generally names generated by
     * Util.getSchemaFromString. In the case of another name conflict, it will not be
     * changed, as that name conflict was created by the user.
     *
     * @param s a Schema object to fix in place
     */
    private static void fixSchemaNames(Schema s) {
        if (s == null)
             return;
        // This regex detects names that could possibly collide that we should change
        Pattern p = Pattern.compile("(bag_|tuple_|map_|val_)(\\d+)", Pattern.CASE_INSENSITIVE);
        Set<String> names = new HashSet<String>(s.size(), 1.0f);
        for (Schema.FieldSchema fs : s.getFields()) {
            if (fs.alias == null)
                 continue;
            Matcher m = p.matcher(fs.alias);
            if (m.matches() && names.contains(fs.alias)) {
                String prefix = m.group(1);
                int suffix = Integer.parseInt(m.group(2));
                while (names.contains(prefix + suffix))
                    suffix++;
                fs.alias = prefix + suffix;
            }
            names.add(fs.alias);
            if (fs.schema != null) {
                if (fs.type == DataType.BAG) {
                    try {
                        fixSchemaNames(fs.schema.getField(0).schema);
                    } catch (FrontendException e) {
                        throw new RuntimeException("Error recursively fixing schema: " + s, e);
                    }
                } else {
                    fixSchemaNames(fs.schema);
                }
            }
        }
    }

    /**
     * This is just a convenience method which sets the name of the internalSchema to the argument that was given.
     *
     * @param arg a RubyString to set the name of the encapsulated Schema object
     */
    private void setNameIf(IRubyObject arg) {
        if (arg instanceof RubyString) {
            setName(arg.toString());
        } else {
            throw new RuntimeException("Bad name given");
        }
    }

    /**
     * This method sets the name of a RubySchema to the name given. It's important to note that
     * if the RubySchema represents anything other than a tuple, databag, or map then an error
     * will be thrown.
     *
     * @param name a String to set the name of the encapsulated Schema object
     */
    private void setName(String name) {
        Schema.FieldSchema fs;

        try {
            fs = internalSchema.getField(0);
        } catch (FrontendException e) {
            throw new RuntimeException("Error getting field from schema: " + internalSchema, e);
        }

        byte type = fs.type;

        if (type == DataType.TUPLE || type == DataType.BAG || type == DataType.MAP) {
            fs.alias = name;
        } else {
            throw new RuntimeException("setName cannot be set on Schema: " + internalSchema);
        }
    }

    /**
     * The toString method just leverages Schema's printing.
     *
     * @param context the context the method is being executed in
     * @return        a String representation of the encapsulated Schema object
     */
    @JRubyMethod(name = {"to_s", "inspect"})
    public RubyString toString(ThreadContext context) {
        return RubyString.newString(context.getRuntime(), internalSchema.toString());
    }

    /**
     * This is the ruby method which allows people to access elements of the RubySchema object.
     * It can be given either a single numeric index, or a Range object to specify a range of indices.
     * It's important to note that the Schema object returned from this references the Schema stored
     * internally, so if the user wants to make changes without affecting this object, it must be cloned.
     *
     * @param context the context the method is being executed in
     * @param arg     a Fixnum index, Range object to specify a range of values to return, or
     *                a String to look up by alias name
     * @return        the RubySchema object encapsulated the found Schema
     */
    @JRubyMethod(name = {"[]", "slice"})
    public RubySchema get(ThreadContext context, IRubyObject arg) {
        Ruby runtime = context.getRuntime();
        if (arg instanceof RubyFixnum) {
            int index = (int)((RubyFixnum)arg).getLongValue();
            Schema s;
            try {
                s = new Schema(internalSchema.getField(index));
            } catch (FrontendException e) {
                throw new RuntimeException("Invalid index given to get function: " + index, e);
            }
            return new RubySchema(runtime, runtime.getClass("Schema"), s, false); //returns the actual object itself
        } else if (arg instanceof RubyRange) {
            int min = (int)((RubyFixnum)((RubyRange)arg).min(context, Block.NULL_BLOCK)).getLongValue();
            int max = (int)((RubyFixnum)((RubyRange)arg).max(context, Block.NULL_BLOCK)).getLongValue();
            return new RubySchema(runtime, runtime.getClass("Schema"), new Schema(internalSchema.getFields().subList(min, max + 1)), false);
        } else if (arg instanceof RubyString) {
             try {
                 return new RubySchema(runtime, runtime.getClass("Schema"), new Schema(internalSchema.getField(arg.toString())), false);
             } catch (FrontendException e) {
                 throw new RuntimeException("Unable to find field " + arg.toString() + " in schema " + internalSchema, e);
             }
        } else {
            throw new RuntimeException("Invalid argument given to get function: " + arg.toString());
        }
    }

    /**
     * This is a version of [] which allows the range to be specified as such: [1,2].
     *
     * @param context the context the method is being executed in
     * @param arg1    a Fixnum start index
     * @param arg2    a Fixnum end index
     * @return        the RubySchema object encapsulated the found Schema
     */
    @JRubyMethod(name = {"[]", "slice"})
    public RubySchema get(ThreadContext context, IRubyObject arg1, IRubyObject arg2) {
        if (arg1 instanceof RubyFixnum && arg2 instanceof RubyFixnum) {
            Ruby runtime = context.getRuntime();
            int min = (int)((RubyFixnum)arg1).getLongValue();
            int max = (int)((RubyFixnum)arg2).getLongValue() - 1;
            return new RubySchema(runtime, runtime.getClass("Schema"), new Schema(internalSchema.getFields().subList(min, max + 1)), false);
        } else {
            throw new RuntimeException("Bad arguments given to get function: ( " + arg1.toString() + " , " + arg2.toString()+ " )");
        }
    }

    /**
     * This allows the users to set an index or a range of values to
     * a specified RubySchema. The first argument must be a Fixnum or Range,
     * and the second argument may optionally be a Fixnum. The given index
     * (or range of indices) will be replaced by a RubySchema instantiated
     * based on the remaining arguments.
     *
     * @param context the contextthe method is being executed in
     * @param args    a varargs which has to be at least length two.
     * @return        the RubySchema that was added
     */
    @JRubyMethod(name = {"[]=", "set"}, required = 2, rest = true)
    public RubySchema set(ThreadContext context, IRubyObject[] args) {
        IRubyObject arg1 = args[0];
        IRubyObject arg2 = args[1];
        IRubyObject[] arg3 = Arrays.copyOfRange(args, 1, args.length);
        Schema s = internalSchema;
        Ruby runtime = context.getRuntime();
        List<Schema.FieldSchema> lfs = s.getFields();
        int min, max;
        if (arg1 instanceof RubyFixnum && arg2 instanceof RubyFixnum) {
            min = (int)((RubyFixnum)arg1).getLongValue();
            max = (int)((RubyFixnum)arg2).getLongValue();
            arg3 = Arrays.copyOfRange(args, 2, args.length);
        } else if (arg1 instanceof RubyFixnum) {
            min = (int)((RubyFixnum)arg1).getLongValue();
            max = min + 1;
        } else if (arg1 instanceof RubyRange) {
            min = (int)((RubyFixnum)((RubyRange)arg1).min(context, Block.NULL_BLOCK)).getLongValue();
            max = (int)((RubyFixnum)((RubyRange)arg1).max(context, Block.NULL_BLOCK)).getLongValue() + 1;
        } else {
            throw new RuntimeException("Bad arguments given to get function: ( " + arg1.toString() + " , " + arg2.toString()+ " )");
        }
        for (int i = min; i < max; i++)
            lfs.remove(min);
        if (arg3 == null || arg3.length == 0)
            throw new RuntimeException("Must have schema argument for []=");
        RubySchema rs = new RubySchema(runtime, runtime.getClass("Schema")).initialize(arg3);
        for (Schema.FieldSchema fs : rs.getInternalSchema().getFields())
            lfs.add(min++, fs);
        RubySchema.fixSchemaNames(internalSchema);
        return rs;
    }

    /**
     * This method provides addition semantics, without modifying the original Schema.
     * This method can be given any number of arguments, much as with the constructor.
     *
     * @param context the context the method is being executed in
     * @param args    a varargs which can be any valid set of arguments that
     *                can initialize a RubySchema
     * @return        the Rresult of the addition
     */
    @JRubyMethod(name = {"add", "+"}, rest = true)
    public RubySchema add(ThreadContext context, IRubyObject[] args) {
        RubySchema rsClone = clone(context);
        rsClone.addInPlace(context, args);
        return rsClone;
    }

    /**
     * This method provides addition semantics, modifying the original Schema in place.
     * This method can be given any number of arguments, much as with the constructor.
     *
     * @param context the context the method is being executed in
     * @param args    a varargs which can be any valid set of arguments that
     *                can initialize a RubySchema
     */
    @JRubyMethod(name = "add!", rest = true)
    public void addInPlace(ThreadContext context, IRubyObject[] args) {
        Ruby runtime = context.getRuntime();
        List<Schema.FieldSchema> lfs = internalSchema.getFields();
        RubySchema rs = new RubySchema(runtime, runtime.getClass("Schema")).initialize(args);
        for (Schema.FieldSchema fs : rs.getInternalSchema().getFields())
            lfs.add(fs);
        RubySchema.fixSchemaNames(internalSchema);
    }

    /**
     * @param context the context the method is being executed in
     * @return        a RubySchema copy of the Schema
     */
    @JRubyMethod
    public RubySchema clone(ThreadContext context) {
        Ruby runtime = context.getRuntime();
        return new RubySchema(runtime, runtime.getClass("Schema"), internalSchema);
    }

    /**
     * Given a field name this string will search the RubySchema for a FieldSchema
     * with that name and return it encapsulated in a Schema.
     *
     * @param context the context the method is being executed in
     * @param arg     a RubyString serving as an alias to look
     *                for in the Schema
     * @return        the found RubySchema
     */
    @JRubyMethod
    public RubySchema find(ThreadContext context, IRubyObject arg) {
        if (arg instanceof RubyString) {
            Ruby runtime = context.getRuntime();
            return new RubySchema(runtime, runtime.getClass("Schema"), RubySchema.find(internalSchema, arg.toString()), false);
        } else {
            throw new RuntimeException("Invalid arguement passed to find: " + arg);
        }
    }

    /**
     * This is a helper method which recursively searches for an alias in the Schema
     * encapsulated by RubySchema. This is necessary because findFieldSchema uses
     * canonicalName, not name.
     *
     * @param s     the Schema to search through
     * @param alias
     * @return      the found RubySchema
     */
    private static Schema find(Schema s, String alias) {
        for (Schema.FieldSchema fs : s.getFields())
            if (alias.equals(fs.alias))
                 return new Schema(fs);
        for (Schema.FieldSchema fs : s.getFields())
            if (fs.schema != null) {
                 Schema r = RubySchema.find(fs.schema, alias);
                 if (r != null)
                     return r;
            }
        return new Schema();
    }

    /**
     * Given a field name, this will return the index of it in the schema.
     *
     * @param context the context the method is being executed in
     * @param arg     a field name to look for
     * @return        the index for that field name
     */
    @JRubyMethod
    public RubyFixnum index(ThreadContext context, IRubyObject arg) {
        if (arg instanceof RubyString) {
            try {
                return new RubyFixnum(context.getRuntime(), internalSchema.getPosition(arg.toString()));
            } catch (FrontendException e) {
                throw new RuntimeException("Unable to find position for argument: " + arg);
            }
        } else {
            throw new RuntimeException("Invalid arguement passed to index: " + arg);
        }
    }

    /**
     * @param context the context the method is being executed in
     * @return        the size of the encapsulated Schema
     */
    @JRubyMethod(name = {"size", "length"})
    public RubyFixnum size(ThreadContext context) {
        return new RubyFixnum(context.getRuntime(), internalSchema.size());
    }

    /**
     * This is a helper method to pull out the native Java type from the ruby object.
     *
     * @return the encapsulated Schema
     */
    public Schema getInternalSchema() {
        return internalSchema;
    }

    /**
     * This method allows access into the Schema nested in the encapsulated Schema. For example,
     * if the encapsulated Schema is a bag Schema, this allows the user to access the schema of
     * the interior Tuple.
     *
     * @param context the context the method is being executed in
     * @return        a RubySchema encapsulating the nested Schema
     */
    @JRubyMethod(name = {"get", "inner", "in"})
    public RubySchema get(ThreadContext context) {
        if (internalSchema.size() != 1)
            throw new RuntimeException("Can only return nested schema if there is one schema to get");
        Ruby runtime = context.getRuntime();
        try {
            return new RubySchema(runtime, runtime.getClass("Schema"), internalSchema.getField(0).schema, false);
        } catch (FrontendException e) {
            throw new RuntimeException("Schema does not have a nested FieldScema", e);
        }
    }

    /**
     * This method allows the user to see the name of the alias of the FieldSchema of the encapsulated
     * Schema. This method only works if the Schema has one FieldSchema.
     *
     * @param context the context the method is being executed in
     * @return        the name of the Schema
     */
    @JRubyMethod(name = "name")
    public RubyString getName(ThreadContext context) {
        try {
            if (internalSchema.size() != 1)
                 throw new RuntimeException("Can only get name if there is one schema present");

            return RubyString.newString(context.getRuntime(), internalSchema.getField(0).alias);
        } catch (FrontendException e) {
            throw new RuntimeException("Unable to get field from Schema", e);
        }
    }

    /**
     * This method allows the user to set the name of the alias of the FieldSchema of the encapsulated
     * Schema. This method only works if the Schema has one FieldSchema.
     *
     * @param arg a RubyString to set the name to
     * @return    the new name
     */
    @JRubyMethod(name = "name=")
    public RubyString setName(IRubyObject arg) {
        if (arg instanceof RubyString) {
             if (internalSchema.size() != 1)
                 throw new RuntimeException("Can only set name if there is one schema present");
             try {
                 internalSchema.getField(0).alias = arg.toString();
                 return (RubyString)arg;
             } catch (FrontendException e) {
                 throw new RuntimeException("Unable to get field from Schema", e);
             }
        } else {
             throw new RuntimeException("Improper argument passed to 'name=':" + arg);
        }
    }
}
TOP

Related Classes of org.apache.pig.scripting.jruby.RubySchema

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.