Source Code of org.apache.tajo.datum.protobuf.ProtobufJsonFormat

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */


package org.apache.tajo.datum.protobuf;


import com.google.protobuf.*;
import com.google.protobuf.Descriptors.Descriptor;
import com.google.protobuf.Descriptors.EnumDescriptor;
import com.google.protobuf.Descriptors.EnumValueDescriptor;
import com.google.protobuf.Descriptors.FieldDescriptor;


import java.io.IOException;
import java.math.BigInteger;
import java.text.CharacterIterator;
import java.text.StringCharacterIterator;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


public class ProtobufJsonFormat extends AbstractCharBasedFormatter {
  private final static ProtobufJsonFormat instance;


  static {
    instance = new ProtobufJsonFormat();
  }


  public static ProtobufJsonFormat getInstance() {
    return instance;
  }


  private ProtobufJsonFormat() {
    super();
  }


  /**
   * Outputs a textual representation of the Protocol Message supplied into the parameter output.
   * (This representation is the new version of the classic "ProtocolPrinter" output from the
   * original Protocol Buffer system)
   */
  public void print(final Message message, Appendable output) throws IOException {
    JsonGenerator generator = new JsonGenerator(output);
    generator.print("{");
    print(message, generator);
    generator.print("}");
  }


  /**
   * Outputs a textual representation of {@code fields} to {@code output}.
   */
  public void print(final UnknownFieldSet fields, Appendable output) throws IOException {
    JsonGenerator generator = new JsonGenerator(output);
    generator.print("{");
    printUnknownFields(fields, generator);
    generator.print("}");
  }




  protected void print(Message message, JsonGenerator generator) throws IOException {


    for (Iterator<Map.Entry<FieldDescriptor, Object>> iter = message.getAllFields().entrySet().iterator(); iter.hasNext();) {
      Map.Entry<FieldDescriptor, Object> field = iter.next();
      printField(field.getKey(), field.getValue(), generator);
      if (iter.hasNext()) {
        generator.print(",");
      }
    }
    if (message.getUnknownFields().asMap().size() > 0)
      generator.print(", ");
    printUnknownFields(message.getUnknownFields(), generator);
  }


  public void printField(FieldDescriptor field, Object value, JsonGenerator generator) throws IOException {


    printSingleField(field, value, generator);
  }


  private void printSingleField(FieldDescriptor field,
                                Object value,
                                JsonGenerator generator) throws IOException {
    if (field.isExtension()) {
      generator.print("\"");
      // We special-case MessageSet elements for compatibility with proto1.
      if (field.getContainingType().getOptions().getMessageSetWireFormat()
          && (field.getType() == FieldDescriptor.Type.MESSAGE) && (field.isOptional())
          // object equality
          && (field.getExtensionScope() == field.getMessageType())) {
        generator.print(field.getMessageType().getFullName());
      } else {
        generator.print(field.getFullName());
      }
      generator.print("\"");
    } else {
      generator.print("\"");
      if (field.getType() == FieldDescriptor.Type.GROUP) {
        // Groups must be serialized with their original capitalization.
        generator.print(field.getMessageType().getName());
      } else {
        generator.print(field.getName());
      }
      generator.print("\"");
    }


    // Done with the name, on to the value


    if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
      generator.print(": ");
      generator.indent();
    } else {
      generator.print(": ");
    }




    if (field.isRepeated()) {
      // Repeated field. Print each element.
      generator.print("[");
      for (Iterator<?> iter = ((List<?>) value).iterator(); iter.hasNext();) {
        printFieldValue(field, iter.next(), generator);
        if (iter.hasNext()) {
          generator.print(",");
        }
      }
      generator.print("]");
    } else {
      printFieldValue(field, value, generator);
      if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
        generator.outdent();
      }
    }
  }


  private void printFieldValue(FieldDescriptor field, Object value, JsonGenerator generator) throws IOException {
    switch (field.getType()) {
      case INT32:
      case INT64:
      case SINT32:
      case SINT64:
      case SFIXED32:
      case SFIXED64:
      case FLOAT:
      case DOUBLE:
      case BOOL:
        // Good old toString() does what we want for these types.
        generator.print(value.toString());
        break;


      case UINT32:
      case FIXED32:
        generator.print(TextUtils.unsignedToString((Integer) value));
        break;


      case UINT64:
      case FIXED64:
        generator.print(TextUtils.unsignedToString((Long) value));
        break;


      case STRING:
        generator.print("\"");
        generator.print(escapeText((String) value));
        generator.print("\"");
        break;


      case BYTES: {
        generator.print("\"");
        generator.print(escapeBytes((ByteString) value));
        generator.print("\"");
        break;
      }


      case ENUM: {
        generator.print("\"");
        generator.print(((EnumValueDescriptor) value).getName());
        generator.print("\"");
        break;
      }


      case MESSAGE:
      case GROUP:
        generator.print("{");
        print((Message) value, generator);
        generator.print("}");
        break;
    }
  }


  protected void printUnknownFields(UnknownFieldSet unknownFields, JsonGenerator generator) throws IOException {
    boolean firstField = true;
    for (Map.Entry<Integer, UnknownFieldSet.Field> entry : unknownFields.asMap().entrySet()) {
      UnknownFieldSet.Field field = entry.getValue();


      if (firstField) {firstField = false;}
      else {generator.print(", ");}


      generator.print("\"");
      generator.print(entry.getKey().toString());
      generator.print("\"");
      generator.print(": [");


      boolean firstValue = true;
      for (long value : field.getVarintList()) {
        if (firstValue) {firstValue = false;}
        else {generator.print(", ");}
        generator.print(TextUtils.unsignedToString(value));
      }
      for (int value : field.getFixed32List()) {
        if (firstValue) {firstValue = false;}
        else {generator.print(", ");}
        generator.print(String.format((Locale) null, "0x%08x", value));
      }
      for (long value : field.getFixed64List()) {
        if (firstValue) {firstValue = false;}
        else {generator.print(", ");}
        generator.print(String.format((Locale) null, "0x%016x", value));
      }
      for (ByteString value : field.getLengthDelimitedList()) {
        if (firstValue) {firstValue = false;}
        else {generator.print(", ");}
        generator.print("\"");
        generator.print(escapeBytes(value));
        generator.print("\"");
      }
      for (UnknownFieldSet value : field.getGroupList()) {
        if (firstValue) {firstValue = false;}
        else {generator.print(", ");}
        generator.print("{");
        printUnknownFields(value, generator);
        generator.print("}");
      }
      generator.print("]");
    }
  }








  /**
   * An inner class for writing text to the output stream.
   */
  protected static class JsonGenerator {


    Appendable output;
    boolean atStartOfLine = true;
    StringBuilder indent = new StringBuilder();


    public JsonGenerator(Appendable output) {
      this.output = output;
    }


    /**
     * Indent text by two spaces. After calling Indent(), two spaces will be inserted at the
     * beginning of each line of text. Indent() may be called multiple times to produce deeper
     * indents.
     */
    public void indent() {
      indent.append("  ");
    }


    /**
     * Reduces the current indent level by two spaces, or crashes if the indent level is zero.
     */
    public void outdent() {
      int length = indent.length();
      if (length == 0) {
        throw new IllegalArgumentException(" Outdent() without matching Indent().");
      }
      indent.delete(length - 2, length);
    }


    /**
     * Print text to the output stream.
     */
    public void print(CharSequence text) throws IOException {
      int size = text.length();
      int pos = 0;


      for (int i = 0; i < size; i++) {
        if (text.charAt(i) == '\n') {
          write(text.subSequence(pos, size), i - pos + 1);
          pos = i + 1;
          atStartOfLine = true;
        }
      }
      write(text.subSequence(pos, size), size - pos);
    }


    private void write(CharSequence data, int size) throws IOException {
      if (size == 0) {
        return;
      }
      if (atStartOfLine) {
        atStartOfLine = false;
        output.append(indent);
      }
      output.append(data);
    }
  }


  // =================================================================
  // Parsing


  /**
   * Represents a stream of tokens parsed from a {@code String}.
   * <p/>
   * <p>
   * The Java standard library provides many classes that you might think would be useful for
   * implementing this, but aren't. For example:
   * <p/>
   * <ul>
   * <li>{@code java.io.StreamTokenizer}: This almost does what we want -- or, at least, something
   * that would get us close to what we want -- except for one fatal flaw: It automatically
   * un-escapes strings using Java escape sequences, which do not include all the escape sequences
   * we need to support (e.g. '\x').
   * <li>{@code java.util.Scanner}: This seems like a great way at least to parse regular
   * expressions out of a stream (so we wouldn't have to load the entire input into a single
   * string before parsing). Sadly, {@code Scanner} requires that tokens be delimited with some
   * delimiter. Thus, although the text "foo:" should parse to two tokens ("foo" and ":"), {@code
   * Scanner} would recognize it only as a single token. Furthermore, {@code Scanner} provides no
   * way to inspect the contents of delimiters, making it impossible to keep track of line and
   * column numbers.
   * </ul>
   * <p/>
   * <p>
   * Luckily, Java's regular expression support does manage to be useful to us. (Barely: We need
   * {@code Matcher.usePattern()}, which is new in Java 1.5.) So, we can use that, at least.
   * Unfortunately, this implies that we need to have the entire input in one contiguous string.
   */
  protected static class Tokenizer {


    private final CharSequence text;
    private final Matcher matcher;
    private String currentToken;


    // The character index within this.text at which the current token begins.
    private int pos = 0;


    // The line and column numbers of the current token.
    private int line = 0;
    private int column = 0;


    // The line and column numbers of the previous token (allows throwing
    // errors *after* consuming).
    private int previousLine = 0;
    private int previousColumn = 0;


    // We use possesive quantifiers (*+ and ++) because otherwise the Java
    // regex matcher has stack overflows on large inputs.
    private static final Pattern WHITESPACE =
        Pattern.compile("(\\s|(#.*$))++", Pattern.MULTILINE);
    private static final Pattern TOKEN = Pattern.compile(
        "[a-zA-Z_][0-9a-zA-Z_+-]*+|" +                // an identifier
            "[.]?[0-9+-][0-9a-zA-Z_.+-]*+|" +             // a number
            "\"([^\"\n\\\\]|\\\\.)*+(\"|\\\\?$)|" +       // a double-quoted string
            "\'([^\'\n\\\\]|\\\\.)*+(\'|\\\\?$)",         // a single-quoted string
        Pattern.MULTILINE);


    private static final Pattern DOUBLE_INFINITY = Pattern.compile(
        "-?inf(inity)?",
        Pattern.CASE_INSENSITIVE);
    private static final Pattern FLOAT_INFINITY = Pattern.compile(
        "-?inf(inity)?f?",
        Pattern.CASE_INSENSITIVE);
    private static final Pattern FLOAT_NAN = Pattern.compile(
        "nanf?",
        Pattern.CASE_INSENSITIVE);


    /**
     * Construct a tokenizer that parses tokens from the given text.
     */
    public Tokenizer(CharSequence text) {
      this.text = text;
      matcher = WHITESPACE.matcher(text);
      skipWhitespace();
      nextToken();
    }


    /**
     * Are we at the end of the input?
     */
    public boolean atEnd() {
      return currentToken.length() == 0;
    }


    /**
     * Advance to the next token.
     */
    public void nextToken() {
      previousLine = line;
      previousColumn = column;


      // Advance the line counter to the current position.
      while (pos < matcher.regionStart()) {
        if (text.charAt(pos) == '\n') {
          ++line;
          column = 0;
        } else {
          ++column;
        }
        ++pos;
      }


      // Match the next token.
      if (matcher.regionStart() == matcher.regionEnd()) {
        // EOF
        currentToken = "";
      } else {
        matcher.usePattern(TOKEN);
        if (matcher.lookingAt()) {
          currentToken = matcher.group();
          matcher.region(matcher.end(), matcher.regionEnd());
        } else {
          // Take one character.
          currentToken = String.valueOf(text.charAt(pos));
          matcher.region(pos + 1, matcher.regionEnd());
        }


        skipWhitespace();
      }
    }


    /**
     * Skip over any whitespace so that the matcher region starts at the next token.
     */
    private void skipWhitespace() {
      matcher.usePattern(WHITESPACE);
      if (matcher.lookingAt()) {
        matcher.region(matcher.end(), matcher.regionEnd());
      }
    }


    /**
     * If the next token exactly matches {@code token}, consume it and return {@code true}.
     * Otherwise, return {@code false} without doing anything.
     */
    public boolean tryConsume(String token) {
      if (currentToken.equals(token)) {
        nextToken();
        return true;
      } else {
        return false;
      }
    }


    /**
     * If the next token exactly matches {@code token}, consume it. Otherwise, throw a
     * {@link ParseException}.
     */
    public void consume(String token) throws ParseException {
      if (!tryConsume(token)) {
        throw parseException("Expected \"" + token + "\".");
      }
    }


    /**
     * Returns {@code true} if the next token is an integer, but does not consume it.
     */
    public boolean lookingAtInteger() {
      if (currentToken.length() == 0) {
        return false;
      }


      char c = currentToken.charAt(0);
      return (('0' <= c) && (c <= '9')) || (c == '-') || (c == '+');
    }


    /**
     * Returns {@code true} if the next token is a boolean (true/false), but does not consume it.
     */
    public boolean lookingAtBoolean() {
      if (currentToken.length() == 0) {
        return false;
      }


      return ("true".equals(currentToken) || "false".equals(currentToken));
    }


    /**
     * @return currentToken to which the Tokenizer is pointing.
     */
    public String currentToken() {
      return currentToken;
    }


    /**
     * If the next token is an identifier, consume it and return its value. Otherwise, throw a
     * {@link ParseException}.
     */
    public String consumeIdentifier() throws ParseException {
      for (int i = 0; i < currentToken.length(); i++) {
        char c = currentToken.charAt(i);
        if ((('a' <= c) && (c <= 'z')) || (('A' <= c) && (c <= 'Z'))
            || (('0' <= c) && (c <= '9')) || (c == '_') || (c == '.') || (c == '"')) {
          // OK
        } else {
          throw parseException("Expected identifier. -" + c);
        }
      }


      String result = currentToken;
      // Need to clean-up result to remove quotes of any kind
      result = result.replaceAll("\"|'", "");
      nextToken();
      return result;
    }


    /**
     * If the next token is a 32-bit signed integer, consume it and return its value. Otherwise,
     * throw a {@link ParseException}.
     */
    public int consumeInt32() throws ParseException {
      try {
        int result = parseInt32(currentToken);
        nextToken();
        return result;
      } catch (NumberFormatException e) {
        throw integerParseException(e);
      }
    }


    /**
     * If the next token is a 32-bit unsigned integer, consume it and return its value.
     * Otherwise, throw a {@link ParseException}.
     */
    public int consumeUInt32() throws ParseException {
      try {
        int result = parseUInt32(currentToken);
        nextToken();
        return result;
      } catch (NumberFormatException e) {
        throw integerParseException(e);
      }
    }


    /**
     * If the next token is a 64-bit signed integer, consume it and return its value. Otherwise,
     * throw a {@link ParseException}.
     */
    public long consumeInt64() throws ParseException {
      try {
        long result = parseInt64(currentToken);
        nextToken();
        return result;
      } catch (NumberFormatException e) {
        throw integerParseException(e);
      }
    }


    /**
     * If the next token is a 64-bit unsigned integer, consume it and return its value.
     * Otherwise, throw a {@link ParseException}.
     */
    public long consumeUInt64() throws ParseException {
      try {
        long result = parseUInt64(currentToken);
        nextToken();
        return result;
      } catch (NumberFormatException e) {
        throw integerParseException(e);
      }
    }


    /**
     * If the next token is a double, consume it and return its value. Otherwise, throw a
     * {@link ParseException}.
     */
    public double consumeDouble() throws ParseException {
      // We need to parse infinity and nan separately because
      // Double.parseDouble() does not accept "inf", "infinity", or "nan".
      if (DOUBLE_INFINITY.matcher(currentToken).matches()) {
        boolean negative = currentToken.startsWith("-");
        nextToken();
        return negative ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
      }
      if (currentToken.equalsIgnoreCase("nan")) {
        nextToken();
        return Double.NaN;
      }
      try {
        double result = Double.parseDouble(currentToken);
        nextToken();
        return result;
      } catch (NumberFormatException e) {
        throw floatParseException(e);
      }
    }


    /**
     * If the next token is a float, consume it and return its value. Otherwise, throw a
     * {@link ParseException}.
     */
    public float consumeFloat() throws ParseException {
      // We need to parse infinity and nan separately because
      // Float.parseFloat() does not accept "inf", "infinity", or "nan".
      if (FLOAT_INFINITY.matcher(currentToken).matches()) {
        boolean negative = currentToken.startsWith("-");
        nextToken();
        return negative ? Float.NEGATIVE_INFINITY : Float.POSITIVE_INFINITY;
      }
      if (FLOAT_NAN.matcher(currentToken).matches()) {
        nextToken();
        return Float.NaN;
      }
      try {
        float result = Float.parseFloat(currentToken);
        nextToken();
        return result;
      } catch (NumberFormatException e) {
        throw floatParseException(e);
      }
    }


    /**
     * If the next token is a boolean, consume it and return its value. Otherwise, throw a
     * {@link ParseException}.
     */
    public boolean consumeBoolean() throws ParseException {
      if (currentToken.equals("true")) {
        nextToken();
        return true;
      } else if (currentToken.equals("false")) {
        nextToken();
        return false;
      } else {
        throw parseException("Expected \"true\" or \"false\".");
      }
    }


    /**
     * If the next token is a string, consume it and return its (unescaped) value. Otherwise,
     * throw a {@link ParseException}.
     */
    public String consumeString() throws ParseException {
      char quote = currentToken.length() > 0 ? currentToken.charAt(0) : '\0';
      if ((quote != '\"') && (quote != '\'')) {
        throw parseException("Expected string.");
      }


      if ((currentToken.length() < 2)
          || (currentToken.charAt(currentToken.length() - 1) != quote)) {
        throw parseException("String missing ending quote.");
      }


      try {
        String escaped = currentToken.substring(1, currentToken.length() - 1);
        String result = unescapeText(escaped);
        nextToken();
        return result;
      } catch (InvalidEscapeSequence e) {
        throw parseException(e.getMessage());
      }
    }


    /**
     * If the next token is a string, consume it, unescape it as a
     * {@link com.googlecode.protobuf.format.ByteString}, and return it. Otherwise, throw a
     * {@link ParseException}.
     */
    public ByteString consumeByteString() throws ParseException {
      char quote = currentToken.length() > 0 ? currentToken.charAt(0) : '\0';
      if ((quote != '\"') && (quote != '\'')) {
        throw parseException("Expected string.");
      }


      if ((currentToken.length() < 2)
          || (currentToken.charAt(currentToken.length() - 1) != quote)) {
        throw parseException("String missing ending quote.");
      }


      try {
        String escaped = currentToken.substring(1, currentToken.length() - 1);
        ByteString result = unescapeBytes(escaped);
        nextToken();
        return result;
      } catch (InvalidEscapeSequence e) {
        throw parseException(e.getMessage());
      }
    }


    /**
     * Returns a {@link ParseException} with the current line and column numbers in the
     * description, suitable for throwing.
     */
    public ParseException parseException(String description) {
      // Note: People generally prefer one-based line and column numbers.
      return new ParseException((line + 1) + ":" + (column + 1) + ": " + description);
    }


    /**
     * Returns a {@link ParseException} with the line and column numbers of the previous token
     * in the description, suitable for throwing.
     */
    public ParseException parseExceptionPreviousToken(String description) {
      // Note: People generally prefer one-based line and column numbers.
      return new ParseException((previousLine + 1) + ":" + (previousColumn + 1) + ": "
          + description);
    }


    /**
     * Constructs an appropriate {@link ParseException} for the given {@code
     * NumberFormatException} when trying to parse an integer.
     */
    private ParseException integerParseException(NumberFormatException e) {
      return parseException("Couldn't parse integer: " + e.getMessage());
    }


    /**
     * Constructs an appropriate {@link ParseException} for the given {@code
     * NumberFormatException} when trying to parse a float or double.
     */
    private ParseException floatParseException(NumberFormatException e) {
      return parseException("Couldn't parse number: " + e.getMessage());
    }
  }


  /**
   * Thrown when parsing an invalid text format message.
   */
  public static class ParseException extends IOException {


    private static final long serialVersionUID = 1L;


    public ParseException(String message) {
      super(message);
    }
  }




  /**
   * Parse a text-format message from {@code input} and merge the contents into {@code builder}.
   * Extensions will be recognized if they are registered in {@code extensionRegistry}.
   */
  public void merge(CharSequence input,
                    ExtensionRegistry extensionRegistry,
                    Message.Builder builder) throws ParseException {
    Tokenizer tokenizer = new Tokenizer(input);


    // Based on the state machine @ http://json.org/


    tokenizer.consume("{"); // Needs to happen when the object starts.
    while (!tokenizer.tryConsume("}")) { // Continue till the object is done
      mergeField(tokenizer, extensionRegistry, builder);
    }
    // Test to make sure the tokenizer has reached the end of the stream.
    if (!tokenizer.atEnd()) {
      throw tokenizer.parseException("Expecting the end of the stream, but there seems to be more data!  Check the input for a valid JSON format.");
    }
  }




  /**
   * Parse a single field from {@code tokenizer} and merge it into {@code builder}. If a ',' is
   * detected after the field ends, the next field will be parsed automatically
   */
  protected void mergeField(Tokenizer tokenizer,
                            ExtensionRegistry extensionRegistry,
                            Message.Builder builder) throws ParseException {
    FieldDescriptor field;
    Descriptor type = builder.getDescriptorForType();
    ExtensionRegistry.ExtensionInfo extension = null;
    boolean unknown = false;


    String name = tokenizer.consumeIdentifier();
    field = type.findFieldByName(name);


    // Group names are expected to be capitalized as they appear in the
    // .proto file, which actually matches their type names, not their field
    // names.
    if (field == null) {
      // Explicitly specify US locale so that this code does not break when
      // executing in Turkey.
      String lowerName = name.toLowerCase(Locale.US);
      field = type.findFieldByName(lowerName);
      // If the case-insensitive match worked but the field is NOT a group,
      if ((field != null) && (field.getType() != FieldDescriptor.Type.GROUP)) {
        field = null;
      }
    }
    // Again, special-case group names as described above.
    if ((field != null) && (field.getType() == FieldDescriptor.Type.GROUP)
        && !field.getMessageType().getName().equals(name)) {
      field = null;
    }


    // Last try to lookup by field-index if 'name' is numeric,
    // which indicates a possible unknown field
    if (field == null && TextUtils.isDigits(name)) {
      field = type.findFieldByNumber(Integer.parseInt(name));
      unknown = true;
    }


    // Finally, look for extensions
    extension = extensionRegistry.findExtensionByName(name);
    if (extension != null) {
      if (extension.descriptor.getContainingType() != type) {
        throw tokenizer.parseExceptionPreviousToken("Extension \"" + name
            + "\" does not extend message type \""
            + type.getFullName() + "\".");
      }
      field = extension.descriptor;
    }


    // Disabled throwing exception if field not found, since it could be a different version.
    if (field == null) {
      handleMissingField(tokenizer, extensionRegistry, builder);
      //throw tokenizer.parseExceptionPreviousToken("Message type \"" + type.getFullName()
      //                                            + "\" has no field named \"" + name
      //                                            + "\".");
    }


    if (field != null) {
      tokenizer.consume(":");
      boolean array = tokenizer.tryConsume("[");


      if (array) {
        while (!tokenizer.tryConsume("]")) {
          handleValue(tokenizer, extensionRegistry, builder, field, extension, unknown);
          tokenizer.tryConsume(",");
        }
      } else {
        handleValue(tokenizer, extensionRegistry, builder, field, extension, unknown);
      }
    }


    if (tokenizer.tryConsume(",")) {
      // Continue with the next field
      mergeField(tokenizer, extensionRegistry, builder);
    }
  }


  private void handleMissingField(Tokenizer tokenizer,
                                  ExtensionRegistry extensionRegistry,
                                  Message.Builder builder) throws ParseException {
    tokenizer.tryConsume(":");
    if ("{".equals(tokenizer.currentToken())) {
      // Message structure
      tokenizer.consume("{");
      do {
        tokenizer.consumeIdentifier();
        handleMissingField(tokenizer, extensionRegistry, builder);
      } while (tokenizer.tryConsume(","));
      tokenizer.consume("}");
    } else if ("[".equals(tokenizer.currentToken())) {
      // Collection
      tokenizer.consume("[");
      do {
        handleMissingField(tokenizer, extensionRegistry, builder);
      } while (tokenizer.tryConsume(","));
      tokenizer.consume("]");
    } else { //if (!",".equals(tokenizer.currentToken)){
      // Primitive value
      if ("null".equals(tokenizer.currentToken())) {
        tokenizer.consume("null");
      } else if (tokenizer.lookingAtInteger()) {
        tokenizer.consumeInt64();
      } else if (tokenizer.lookingAtBoolean()) {
        tokenizer.consumeBoolean();
      } else {
        tokenizer.consumeString();
      }
    }
  }


  private void handleValue(Tokenizer tokenizer,
                           ExtensionRegistry extensionRegistry,
                           Message.Builder builder,
                           FieldDescriptor field,
                           ExtensionRegistry.ExtensionInfo extension,
                           boolean unknown) throws ParseException {


    Object value = null;
    if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
      value = handleObject(tokenizer, extensionRegistry, builder, field, extension, unknown);
    } else {
      value = handlePrimitive(tokenizer, field);
    }
    if (value != null) {
      if (field.isRepeated()) {
        builder.addRepeatedField(field, value);
      } else {
        builder.setField(field, value);
      }
    }
  }


  private Object handlePrimitive(Tokenizer tokenizer, FieldDescriptor field) throws ParseException {
    Object value = null;
    if ("null".equals(tokenizer.currentToken())) {
      tokenizer.consume("null");
      return value;
    }
    switch (field.getType()) {
      case INT32:
      case SINT32:
      case SFIXED32:
        value = tokenizer.consumeInt32();
        break;


      case INT64:
      case SINT64:
      case SFIXED64:
        value = tokenizer.consumeInt64();
        break;


      case UINT32:
      case FIXED32:
        value = tokenizer.consumeUInt32();
        break;


      case UINT64:
      case FIXED64:
        value = tokenizer.consumeUInt64();
        break;


      case FLOAT:
        value = tokenizer.consumeFloat();
        break;


      case DOUBLE:
        value = tokenizer.consumeDouble();
        break;


      case BOOL:
        value = tokenizer.consumeBoolean();
        break;


      case STRING:
        value = tokenizer.consumeString();
        break;


      case BYTES:
        value = tokenizer.consumeByteString();
        break;


      case ENUM: {
        EnumDescriptor enumType = field.getEnumType();


        if (tokenizer.lookingAtInteger()) {
          int number = tokenizer.consumeInt32();
          value = enumType.findValueByNumber(number);
          if (value == null) {
            throw tokenizer.parseExceptionPreviousToken("Enum type \""
                + enumType.getFullName()
                + "\" has no value with number "
                + number + ".");
          }
        } else {
          String id = tokenizer.consumeIdentifier();
          value = enumType.findValueByName(id);
          if (value == null) {
            throw tokenizer.parseExceptionPreviousToken("Enum type \""
                + enumType.getFullName()
                + "\" has no value named \""
                + id + "\".");
          }
        }


        break;
      }


      case MESSAGE:
      case GROUP:
        throw new RuntimeException("Can't get here.");
    }
    return value;
  }


  private Object handleObject(Tokenizer tokenizer,
                              ExtensionRegistry extensionRegistry,
                              Message.Builder builder,
                              FieldDescriptor field,
                              ExtensionRegistry.ExtensionInfo extension,
                              boolean unknown) throws ParseException {


    Message.Builder subBuilder;
    if (extension == null) {
      subBuilder = builder.newBuilderForField(field);
    } else {
      subBuilder = extension.defaultInstance.newBuilderForType();
    }


    if (unknown) {
      ByteString data = tokenizer.consumeByteString();
      try {
        subBuilder.mergeFrom(data);
        return subBuilder.build();
      } catch (InvalidProtocolBufferException e) {
        throw tokenizer.parseException("Failed to build " + field.getFullName() + " from " + data);
      }
    }


    tokenizer.consume("{");
    String endToken = "}";


    while (!tokenizer.tryConsume(endToken)) {
      if (tokenizer.atEnd()) {
        throw tokenizer.parseException("Expected \"" + endToken + "\".");
      }
      mergeField(tokenizer, extensionRegistry, subBuilder);
      if (tokenizer.tryConsume(",")) {
        // there are more fields in the object, so continue
        continue;
      }
    }


    return subBuilder.build();
  }


  // =================================================================
  // Utility functions
  //
  // Some of these methods are package-private because Descriptors.java uses
  // them.


  /**
   * Escapes bytes in the format used in protocol buffer text format, which is the same as the
   * format used for C string literals. All bytes that are not printable 7-bit ASCII characters
   * are escaped, as well as backslash, single-quote, and double-quote characters. Characters for
   * which no defined short-hand escape sequence is defined will be escaped using 3-digit octal
   * sequences.
   */
  static String escapeBytes(ByteString input) {
    StringBuilder builder = new StringBuilder(input.size());
    for (int i = 0; i < input.size(); i++) {
      byte b = input.byteAt(i);
      switch (b) {
        // Java does not recognize \a or \v, apparently.
        case 0x07:
          builder.append("\\a");
          break;
        case '\b':
          builder.append("\\b");
          break;
        case '\f':
          builder.append("\\f");
          break;
        case '\n':
          builder.append("\\n");
          break;
        case '\r':
          builder.append("\\r");
          break;
        case '\t':
          builder.append("\\t");
          break;
        case 0x0b:
          builder.append("\\v");
          break;
        case '\\':
          builder.append("\\\\");
          break;
        case '\'':
          builder.append("\\\'");
          break;
        case '"':
          builder.append("\\\"");
          break;
        default:
          if (b >= 0x20) {
            builder.append((char) b);
          } else {
            final String unicodeString = unicodeEscaped((char) b);
            builder.append(unicodeString);
          }
          break;
      }
    }
    return builder.toString();
  }


  static String unicodeEscaped(char ch) {
    if (ch < 0x10) {
      return "\\u000" + Integer.toHexString(ch);
    } else if (ch < 0x100) {
      return "\\u00" + Integer.toHexString(ch);
    } else if (ch < 0x1000) {
      return "\\u0" + Integer.toHexString(ch);
    }
    return "\\u" + Integer.toHexString(ch);
  }


  /**
   * Un-escape a byte sequence as escaped using
   * {@link #escapeBytes(com.googlecode.protobuf.format.ByteString)}. Two-digit hex escapes (starting with
   * "\x") are also recognized.
   */
  static ByteString unescapeBytes(CharSequence input) throws InvalidEscapeSequence {
    byte[] result = new byte[input.length()];
    int pos = 0;
    for (int i = 0; i < input.length(); i++) {
      char c = input.charAt(i);
      if (c == '\\') {
        if (i + 1 < input.length()) {
          ++i;
          c = input.charAt(i);
          if (TextUtils.isOctal(c)) {
            // Octal escape.
            int code = TextUtils.digitValue(c);
            if ((i + 1 < input.length()) && TextUtils.isOctal(input.charAt(i + 1))) {
              ++i;
              code = code * 8 + TextUtils.digitValue(input.charAt(i));
            }
            if ((i + 1 < input.length()) && TextUtils.isOctal(input.charAt(i + 1))) {
              ++i;
              code = code * 8 + TextUtils.digitValue(input.charAt(i));
            }
            result[pos++] = (byte) code;
          } else {
            switch (c) {
              case 'a':
                result[pos++] = 0x07;
                break;
              case 'b':
                result[pos++] = '\b';
                break;
              case 'f':
                result[pos++] = '\f';
                break;
              case 'n':
                result[pos++] = '\n';
                break;
              case 'r':
                result[pos++] = '\r';
                break;
              case 't':
                result[pos++] = '\t';
                break;
              case 'v':
                result[pos++] = 0x0b;
                break;
              case '\\':
                result[pos++] = '\\';
                break;
              case '\'':
                result[pos++] = '\'';
                break;
              case '"':
                result[pos++] = '\"';
                break;


              case 'x':
                // hex escape
                int code = 0;
                if ((i + 1 < input.length()) && TextUtils.isHex(input.charAt(i + 1))) {
                  ++i;
                  code = TextUtils.digitValue(input.charAt(i));
                } else {
                  throw new InvalidEscapeSequence("Invalid escape sequence: '\\x' with no digits");
                }
                if ((i + 1 < input.length()) && TextUtils.isHex(input.charAt(i + 1))) {
                  ++i;
                  code = code * 16 + TextUtils.digitValue(input.charAt(i));
                }
                result[pos++] = (byte) code;
                break;
              case 'u':
                // UTF8 escape
                code = (16 * 3 * TextUtils.digitValue(input.charAt(i+1))) +
                    (16 * 2 * TextUtils.digitValue(input.charAt(i+2))) +
                    (16 * TextUtils.digitValue(input.charAt(i+3))) +
                    TextUtils.digitValue(input.charAt(i+4));
                i = i+4;
                result[pos++] = (byte) code;
                break;


              default:
                throw new InvalidEscapeSequence("Invalid escape sequence: '\\" + c
                    + "'");
            }
          }
        } else {
          throw new InvalidEscapeSequence("Invalid escape sequence: '\\' at end of string.");
        }
      } else {
        result[pos++] = (byte) c;
      }
    }


    return ByteString.copyFrom(result, 0, pos);
  }


  /**
   * Thrown by {@link ProtobufJsonFormat#unescapeBytes} and {@link ProtobufJsonFormat#unescapeText} when an
   * invalid escape sequence is seen.
   */
  static class InvalidEscapeSequence extends IOException {


    private static final long serialVersionUID = 1L;


    public InvalidEscapeSequence(String description) {
      super(description);
    }
  }


  /**
   * Implements JSON string escaping as specified <a href="http://www.ietf.org/rfc/rfc4627.txt">here</a>.
   * <ul>
   *  <li>The following characters are escaped by prefixing them with a '\' : \b,\f,\n,\r,\t,\,"</li>
   *  <li>Other control characters in the range 0x0000-0x001F are escaped using the \\uXXXX notation</li>
   *  <li>UTF-16 surrogate pairs are encoded using the \\uXXXX\\uXXXX notation</li>
   *  <li>any other character is printed as-is</li>
   * </ul>
   */
  static String escapeText(String input) {
    StringBuilder builder = new StringBuilder(input.length());
    CharacterIterator iter = new StringCharacterIterator(input);
    for(char c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
      switch(c) {
        case '\b':
          builder.append("\\b");
          break;
        case '\f':
          builder.append("\\f");
          break;
        case '\n':
          builder.append("\\n");
          break;
        case '\r':
          builder.append("\\r");
          break;
        case '\t':
          builder.append("\\t");
          break;
        case '\\':
          builder.append("\\\\");
          break;
        case '"':
          builder.append("\\\"");
          break;
        default:
          // Check for other control characters
          if(c >= 0x0000 && c <= 0x001F) {
            appendEscapedUnicode(builder, c);
          } else if(Character.isHighSurrogate(c)) {
            // Encode the surrogate pair using 2 six-character sequence (\\uXXXX\\uXXXX)
            appendEscapedUnicode(builder, c);
            c = iter.next();
            if(c == CharacterIterator.DONE) throw new IllegalArgumentException("invalid unicode string: unexpected high surrogate pair value without corresponding low value.");
            appendEscapedUnicode(builder, c);
          } else {
            // Anything else can be printed as-is
            builder.append(c);
          }
          break;
      }
    }
    return builder.toString();
  }


  static void appendEscapedUnicode(StringBuilder builder, char ch) {
    String prefix = "\\u";
    if(ch < 0x10) {
      prefix = "\\u000";
    } else if(ch < 0x100) {
      prefix = "\\u00";
    } else if(ch < 0x1000) {
      prefix = "\\u0";
    }
    builder.append(prefix).append(Integer.toHexString(ch));
  }


  /**
   * Un-escape a text string as escaped using {@link #escapeText(String)}.
   */
  static String unescapeText(String input) throws InvalidEscapeSequence {
    StringBuilder builder = new StringBuilder();
    char[] array = input.toCharArray();
    for(int i = 0; i < array.length; i++) {
      char c = array[i];
      if(c == '\\') {
        if(i + 1 < array.length) {
          ++i;
          c = array[i];
          switch(c) {
            case 'b':
              builder.append('\b');
              break;
            case 'f':
              builder.append('\f');
              break;
            case 'n':
              builder.append('\n');
              break;
            case 'r':
              builder.append('\r');
              break;
            case 't':
              builder.append('\t');
              break;
            case '\\':
              builder.append('\\');
              break;
            case '"':
              builder.append('\"');
              break;
            case '\'':
              builder.append('\'');
              break;
            case 'u':
              // read the next 4 chars
              if(i + 4 < array.length) {
                ++i;
                int code = Integer.parseInt(new String(array, i, 4), 16);
                // this cast is safe because we know how many chars we read
                builder.append((char)code);
                i += 3;
              } else {
                throw new InvalidEscapeSequence("Invalid escape sequence: '\\u' at end of string.");
              }
              break;
            default:
              throw new InvalidEscapeSequence("Invalid escape sequence: '\\" + c + "'");
          }
        } else {
          throw new InvalidEscapeSequence("Invalid escape sequence: '\\' at end of string.");
        }
      } else {
        builder.append(c);
      }
    }


    return builder.toString();
  }




  /**
   * Parse a 32-bit signed integer from the text. Unlike the Java standard {@code
   * Integer.parseInt()}, this function recognizes the prefixes "0x" and "0" to signify
   * hexidecimal and octal numbers, respectively.
   */
  static int parseInt32(String text) throws NumberFormatException {
    return (int) parseInteger(text, true, false);
  }


  /**
   * Parse a 32-bit unsigned integer from the text. Unlike the Java standard {@code
   * Integer.parseInt()}, this function recognizes the prefixes "0x" and "0" to signify
   * hexidecimal and octal numbers, respectively. The result is coerced to a (signed) {@code int}
   * when returned since Java has no unsigned integer type.
   */
  static int parseUInt32(String text) throws NumberFormatException {
    return (int) parseInteger(text, false, false);
  }


  /**
   * Parse a 64-bit signed integer from the text. Unlike the Java standard {@code
   * Integer.parseInt()}, this function recognizes the prefixes "0x" and "0" to signify
   * hexidecimal and octal numbers, respectively.
   */
  static long parseInt64(String text) throws NumberFormatException {
    return parseInteger(text, true, true);
  }


  /**
   * Parse a 64-bit unsigned integer from the text. Unlike the Java standard {@code
   * Integer.parseInt()}, this function recognizes the prefixes "0x" and "0" to signify
   * hexidecimal and octal numbers, respectively. The result is coerced to a (signed) {@code long}
   * when returned since Java has no unsigned long type.
   */
  static long parseUInt64(String text) throws NumberFormatException {
    return parseInteger(text, false, true);
  }


  private static long parseInteger(String text, boolean isSigned, boolean isLong) throws NumberFormatException {
    int pos = 0;


    boolean negative = false;
    if (text.startsWith("-", pos)) {
      if (!isSigned) {
        throw new NumberFormatException("Number must be positive: " + text);
      }
      ++pos;
      negative = true;
    }


    int radix = 10;
    if (text.startsWith("0x", pos)) {
      pos += 2;
      radix = 16;
    } else if (text.startsWith("0", pos)) {
      radix = 8;
    }


    String numberText = text.substring(pos);


    long result = 0;
    if (numberText.length() < 16) {
      // Can safely assume no overflow.
      result = Long.parseLong(numberText, radix);
      if (negative) {
        result = -result;
      }


      // Check bounds.
      // No need to check for 64-bit numbers since they'd have to be 16 chars
      // or longer to overflow.
      if (!isLong) {
        if (isSigned) {
          if ((result > Integer.MAX_VALUE) || (result < Integer.MIN_VALUE)) {
            throw new NumberFormatException("Number out of range for 32-bit signed integer: "
                + text);
          }
        } else {
          if ((result >= (1L << 32)) || (result < 0)) {
            throw new NumberFormatException("Number out of range for 32-bit unsigned integer: "
                + text);
          }
        }
      }
    } else {
      BigInteger bigValue = new BigInteger(numberText, radix);
      if (negative) {
        bigValue = bigValue.negate();
      }


      // Check bounds.
      if (!isLong) {
        if (isSigned) {
          if (bigValue.bitLength() > 31) {
            throw new NumberFormatException("Number out of range for 32-bit signed integer: "
                + text);
          }
        } else {
          if (bigValue.bitLength() > 32) {
            throw new NumberFormatException("Number out of range for 32-bit unsigned integer: "
                + text);
          }
        }
      } else {
        if (isSigned) {
          if (bigValue.bitLength() > 63) {
            throw new NumberFormatException("Number out of range for 64-bit signed integer: "
                + text);
          }
        } else {
          if (bigValue.bitLength() > 64) {
            throw new NumberFormatException("Number out of range for 64-bit unsigned integer: "
                + text);
          }
        }
      }


      result = bigValue.longValue();
    }


    return result;
  }
}
Source Code of org.apache.tajo.datum.protobuf.ProtobufJsonFormat

Related Classes of org.apache.tajo.datum.protobuf.ProtobufJsonFormat