Package com.google.enterprise.connector.db.diffing

Source Code of com.google.enterprise.connector.db.diffing.JsonDocument

// Copyright 2011 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.google.enterprise.connector.db.diffing;

import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.enterprise.connector.db.InputStreamFactories;
import com.google.enterprise.connector.spi.Document;
import com.google.enterprise.connector.spi.Property;
import com.google.enterprise.connector.spi.RepositoryException;
import com.google.enterprise.connector.spi.SimpleProperty;
import com.google.enterprise.connector.spi.SkippedDocumentException;
import com.google.enterprise.connector.spi.SpiConstants;
import com.google.enterprise.connector.spi.TraversalContext;
import com.google.enterprise.connector.spi.Value;
import com.google.enterprise.connector.util.InputStreamFactory;
import com.google.enterprise.connector.util.diffing.SnapshotRepositoryRuntimeException;

import org.json.JSONException;
import org.json.JSONObject;
import org.json.JSONWriter;

import java.io.IOException;
import java.io.StringWriter;
import java.text.ParseException;
import java.util.Calendar;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
* A simple {@link Document} implementation created from a {@link JSONObject}.
*/
public class JsonDocument implements Document {
  private static final Logger LOG =
      Logger.getLogger(JsonDocument.class.getName());

  private final JSONObject jsonObject;
  private final String objectId;
  private final Map<String, List<Value>> properties;
  private static TraversalContext traversalContext;

  public static void setTraversalContext(TraversalContext traversalContext) {
    JsonDocument.traversalContext = traversalContext;
  }

  /**
   * Constructor used by {@link DBHandle} when deserializing a
   * {@code DocumentHandle} from the recovery file.
   */
  public JsonDocument(JSONObject jsonObject) {
    this(buildJsonProperties(jsonObject), jsonObject);
  }

  /**
   * Constructor used by the {@link DocumentBuilder} for creating a
   * {@link JsonDocument} object used by {@link RepositoryHandler}
   * for building a collection over JsonDocument.
   */
  public JsonDocument(Map<String, List<Value>> properties,
                      JSONObject jsonObject) {
    this.properties = properties;
    this.jsonObject = jsonObject;
    objectId = getSingleValueString(SpiConstants.PROPNAME_DOCID);
    if (Strings.isNullOrEmpty(objectId)) {
      throw new IllegalArgumentException(
          "Unable to parse for docID from the properties:" + properties);
    }
  }

  public String getDocumentId() {
    return objectId;
  }

  public String toJson() {
    // JSON does not support custom serialization, so we have to find
    // the InputStreamFactory for the content and serialize it
    // ourselves. This could be cleaner if we supported toString on the
    // InputStreamFactory implementations, but that would mean less
    // control over when the LOB was materialized in memory.
    try {
      StringWriter buffer = new StringWriter();
      JSONWriter writer = new JSONWriter(buffer);
      writer.object();
      for (String name : JSONObject.getNames(jsonObject)) {
        writer.key(name).value(toJson(name, jsonObject.get(name)));
      }
      writer.endObject();
      return buffer.toString();
    } catch (IOException e) {
      throw new SnapshotRepositoryRuntimeException(
          "Error serializing document " + objectId, e);
    } catch (JSONException e) {
      throw new SnapshotRepositoryRuntimeException(
          "Error serializing document " + objectId, e);
    }
  }

  /**
   * Serializes the given value. This always returns the value itself,
   * unless it is an {@code InputStreamFactory} for the google:content
   * property, in which case we Base64 encode it.
   */
  private Object toJson(String name, Object input) throws IOException {
    if (name.equals(SpiConstants.PROPNAME_CONTENT)) {
      if (input instanceof String) {
        return input;
      } else if (input instanceof InputStreamFactory) {
        return InputStreamFactories.toBase64String((InputStreamFactory) input);
      } else {
        LOG.warning("Unexpected content object class: "
            + input.getClass().getName());
        return input;
      }
    } else {
      return input;
    }
  }

  /**
   * A class level method for extracting attributes from JSONObject object and
   * creating a {@code Map<String,List<Value>>} used by the superclass({@link
   * SimpleDocument}) constructor and hence creating a JsonDocument Object.
   */
  private static Map<String, List<Value>> buildJsonProperties(JSONObject jo) {
    ImmutableMap.Builder<String, List<Value>> mapBuilder =
        ImmutableMap.builder();
    @SuppressWarnings("unchecked") Iterator<String> jsonKeys = jo.keys();
    while (jsonKeys.hasNext()) {
      String key = jsonKeys.next();
      if (key.equals(SpiConstants.PROPNAME_DOCID)) {
        extractDocid(jo, mapBuilder);
      } else if (key.equals(SpiConstants.PROPNAME_CONTENT)) {
        extractContent(jo, mapBuilder);
      } else if (key.equals(SpiConstants.PROPNAME_LASTMODIFIED)) {
        extractLastModified(jo, mapBuilder);
      } else {
        extractAttribute(jo, mapBuilder, key);
      }
    }
    return mapBuilder.build();
  }

  /**
   * Copies a string-valued attribute from a JSONObject to a map of SPI
   * Value objects.
   */
  private static void extractAttribute(JSONObject jo,
      ImmutableMap.Builder<String, List<Value>> mapBuilder, String key) {
    try {
      if (!jo.isNull(key)) {
        mapBuilder.put(key,
            ImmutableList.of(Value.getStringValue(jo.getString(key))));
      }
    } catch (JSONException e) {
      LOG.log(Level.WARNING,
          "Exception thrown while extracting key: " + key, e);
    }
  }

  /**
   * Copies a required docid attribute from a JSONObject to a map of
   * SPI Value objects.
   */
  private static void extractDocid(JSONObject jo,
      ImmutableMap.Builder<String, List<Value>> mapBuilder) {
    String docid;
    try {
      docid = jo.getString(SpiConstants.PROPNAME_DOCID);
      Preconditions.checkState(!Strings.isNullOrEmpty(docid));
    } catch (Exception e) {
      throw new IllegalArgumentException(
          "Internal consistency error: missing docid", e);
    }
    mapBuilder.put(SpiConstants.PROPNAME_DOCID,
                   ImmutableList.of(Value.getStringValue(docid)));
  }

  /**
   * Extracts Base64-encoded google:content values and puts the
   * decoded value into an InputStreamFactory that minimizes memory
   * usage. If the google:content value is not Base64-encoded, it is
   * converted to bytes using UTF-8. The value in the JSONObject is
   * also replaced by the new value to save memory.
   */
  private static void extractContent(JSONObject jo,
      ImmutableMap.Builder<String, List<Value>> mapBuilder) {
    try {
      String content = jo.getString(SpiConstants.PROPNAME_CONTENT);
      List<Value> values;
      if (Strings.isNullOrEmpty(content)) {
        values = null;
      } else {
        InputStreamFactory factory =
            InputStreamFactories.fromBase64String(content);
        jo.put(SpiConstants.PROPNAME_CONTENT, factory);
        values = ImmutableList.of(Value.getBinaryValue(factory));
      }
      mapBuilder.put(SpiConstants.PROPNAME_CONTENT, values);
    } catch (JSONException e) {
      LOG.log(Level.WARNING, "Exception thrown while extracting content.", e);
    }
  }

  /**
   * Copies the last modified date attribute from a JSONObject to a
   * map of SPI Value objects.
   */
  private static void extractLastModified(JSONObject jo,
      ImmutableMap.Builder<String, List<Value>> mapBuilder) {
    try {
      String lastModified = jo.getString(SpiConstants.PROPNAME_LASTMODIFIED);

      try {
        if (!Strings.isNullOrEmpty(lastModified)) {
          Calendar cal = Value.iso8601ToCalendar(lastModified);
          mapBuilder.put(SpiConstants.PROPNAME_LASTMODIFIED,
              ImmutableList.of(Value.getDateValue(cal)));
        }
      } catch (ParseException e) {
        LOG.log(Level.WARNING,
            "Exception thrown while handling date: " + lastModified, e);
      }
    } catch (JSONException e) {
      LOG.log(Level.WARNING,
          "Exception thrown while extracting last modifed date.", e);
    }
  }

  @Override
  public Set<String> getPropertyNames() {
    return properties.keySet();
  }

  @Override
  public Property findProperty(String name) throws RepositoryException {
    List<Value> property = properties.get(name);
    if (name.equals(SpiConstants.PROPNAME_CONTENT) && filterMimeType()) {
        property = null;
    }
    return (property == null) ? null : new SimpleProperty(property);
  }

  /**
   * Filter the Document or just its Content based upon its MIME type.
   *
   * @return true if content should be skipped based upon its MIME type,
   *         false otherwise.
   * @throws SkippedDocumentException if this document is to be ignored
   *         based upon its MIME type.
   */
  private boolean filterMimeType() throws RepositoryException {
    String mimeType = getSingleValueString(SpiConstants.PROPNAME_MIMETYPE);
    if (mimeType != null && traversalContext != null) {
      int mimeTypeSupportLevel =
          traversalContext.mimeTypeSupportLevel(mimeType);
      if (mimeTypeSupportLevel == 0) {
        LOG.warning("Skipping the contents with docId: " + objectId
            + " as content MIME type " + mimeType + " is not supported.");
        return true;
      } else if (mimeTypeSupportLevel < 0) {
        String msg = new StringBuilder("Skipping the document with docId: ")
            .append(objectId).append(" as the MIME type ").append(mimeType)
            .append(" is in the 'ignored' MIME types list.").toString();
        LOG.warning(msg);
        throw new SkippedDocumentException(msg);
      }
    }
    return false;
  }

  /** Returns the first value of the named property as a String. */
  private String getSingleValueString(String name) {
    List<Value> values = properties.get(name);
    if (values != null) {
      Value value = values.iterator().next();
      if (value != null) {
        return value.toString();
      }
    }
    return null;
  }
}
TOP

Related Classes of com.google.enterprise.connector.db.diffing.JsonDocument

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.