Package org.apache.solr.update

Source Code of org.apache.solr.update.DocumentBuilder

/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.solr.update;

import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
import org.apache.solr.schema.*;

/**
* @version $Id: DocumentBuilder.java 1202510 2011-11-16 02:18:14Z hossman $
*/


// Not thread safe - by design.  Create a new builder for each thread.
public class DocumentBuilder {
  private final IndexSchema schema;
  private Document doc;
  private HashMap<String,String> map;

  public DocumentBuilder(IndexSchema schema) {
    this.schema = schema;
  }

  public void startDoc() {
    doc = new Document();
    map = new HashMap<String,String>();
  }

  protected void addSingleField(SchemaField sfield, String val, float boost) {
    //System.out.println("###################ADDING FIELD "+sfield+"="+val);

    // we don't check for a null val ourselves because a solr.FieldType
    // might actually want to map it to something.  If createField()
    // returns null, then we don't store the field.
    if (sfield.isPolyField()) {
      Fieldable[] fields = sfield.createFields(val, boost);
      if (fields.length > 0) {
        if (!sfield.multiValued()) {
          String oldValue = map.put(sfield.getName(), val);
          if (oldValue != null) {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "ERROR: multiple values encountered for non multiValued field " + sfield.getName()
                    + ": first='" + oldValue + "' second='" + val + "'");
          }
        }
        // Add each field
        for (Fieldable field : fields) {
          doc.add(field);
        }
      }
    } else {
      Fieldable field = sfield.createField(val, boost);
      if (field != null) {
        if (!sfield.multiValued()) {
          String oldValue = map.put(sfield.getName(), val);
          if (oldValue != null) {
            throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"ERROR: multiple values encountered for non multiValued field " + sfield.getName()
                    + ": first='" + oldValue + "' second='" + val + "'");
          }
        }
      }
      doc.add(field);
    }

  }

  /**
   * Add the specified {@link org.apache.solr.schema.SchemaField} to the document.  Does not invoke the copyField mechanism.
   * @param sfield The {@link org.apache.solr.schema.SchemaField} to add
   * @param val The value to add
   * @param boost The boost factor
   *
   * @see #addField(String, String)
   * @see #addField(String, String, float)
   * @see #addSingleField(org.apache.solr.schema.SchemaField, String, float)
   */
  public void addField(SchemaField sfield, String val, float boost) {
    addSingleField(sfield,val,boost);
  }

  /**
   * Add the Field and value to the document, invoking the copyField mechanism
   * @param name The name of the field
   * @param val The value to add
   *
   * @see #addField(String, String, float)
   * @see #addField(org.apache.solr.schema.SchemaField, String, float)
   * @see #addSingleField(org.apache.solr.schema.SchemaField, String, float)
   */
  public void addField(String name, String val) {
    addField(name, val, 1.0f);
  }

  /**
   * Add the Field and value to the document with the specified boost, invoking the copyField mechanism
   * @param name The name of the field.
   * @param val The value to add
   * @param boost The boost
   *
   * @see #addField(String, String)
   * @see #addField(org.apache.solr.schema.SchemaField, String, float)
   * @see #addSingleField(org.apache.solr.schema.SchemaField, String, float)
   *
   */
  public void addField(String name, String val, float boost) {
    SchemaField sfield = schema.getFieldOrNull(name);
    if (sfield != null) {
      addField(sfield,val,boost);
    }

    // Check if we should copy this field to any other fields.
    // This could happen whether it is explicit or not.
    final List<CopyField> copyFields = schema.getCopyFieldsList(name);
    if (copyFields != null) {
      for(CopyField cf : copyFields) {
        addSingleField(cf.getDestination(), cf.getLimitedValue( val ), boost);
      }
    }

    // error if this field name doesn't match anything
    if (sfield==null && (copyFields==null || copyFields.size()==0)) {
      throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"ERROR:unknown field '" + name + "'");
    }
  }

  public void setBoost(float boost) {
    doc.setBoost(boost);
  }

  public void endDoc() {
  }

  // specific to this type of document builder
  public Document getDoc() throws IllegalArgumentException {
   
    // Check for all required fields -- Note, all fields with a
    // default value are defacto 'required' fields. 
    List<String> missingFields = null;
    for (SchemaField field : schema.getRequiredFields()) {
      if (doc.getFieldable(field.getName() ) == null) {
        if (field.getDefaultValue() != null) {
          addField(doc, field, field.getDefaultValue(), 1.0f);
        } else {
          if (missingFields==null) {
            missingFields = new ArrayList<String>(1);
          }
          missingFields.add(field.getName());
        }
      }
    }
 
    if (missingFields != null) {
      StringBuilder builder = new StringBuilder();
      // add the uniqueKey if possible
      if( schema.getUniqueKeyField() != null ) {
        String n = schema.getUniqueKeyField().getName();
        String v = doc.get( n );
        builder.append( "Document ["+n+"="+v+"] " );
      }
      builder.append("missing required fields: " );
      for (String field : missingFields) {
        builder.append(field);
        builder.append(" ");
      }
      throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, builder.toString());
    }
   
    Document ret = doc; doc=null;
    return ret;
  }


  private static void addField(Document doc, SchemaField field, String val, float boost) {
    if (field.isPolyField()) {
      Fieldable[] farr = field.getType().createFields(field, val, boost);
      for (Fieldable f : farr) {
        if (f != null) doc.add(f); // null fields are not added
      }
    } else {
      Fieldable f = field.createField(val, boost);
      if (f != null) doc.add(f)// null fields are not added
    }
  }
 
  private static String getID( SolrInputDocument doc, IndexSchema schema )
  {
    String id = "";
    SchemaField sf = schema.getUniqueKeyField();
    if( sf != null ) {
      id = "[doc="+doc.getFieldValue( sf.getName() )+"] ";
    }
    return id;
  }

  /**
   * Convert a SolrInputDocument to a lucene Document.
   *
   * This function should go elsewhere.  This builds the Document without an
   * extra Map<> checking for multiple values.  For more discussion, see:
   * http://www.nabble.com/Re%3A-svn-commit%3A-r547493---in--lucene-solr-trunk%3A-.--src-java-org-apache-solr-common--src-java-org-apache-solr-schema--src-java-org-apache-solr-update--src-test-org-apache-solr-common--tf3931539.html
   *
   * TODO: /!\ NOTE /!\ This semantics of this function are still in flux. 
   * Something somewhere needs to be able to fill up a SolrDocument from
   * a lucene document - this is one place that may happen.  It may also be
   * moved to an independent function
   *
   * @since solr 1.3
   */
  public static Document toDocument( SolrInputDocument doc, IndexSchema schema )
  {
    Document out = new Document();
    out.setBoost( doc.getDocumentBoost() );
   
    // Load fields from SolrDocument to Document
    for( SolrInputField field : doc ) {
      String name = field.getName();
      SchemaField sfield = schema.getFieldOrNull(name);
      boolean used = false;
      float boost = field.getBoost();
     
      // Make sure it has the correct number
      if( sfield!=null && !sfield.multiValued() && field.getValueCount() > 1 ) {
        throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,
            "ERROR: "+getID(doc, schema)+"multiple values encountered for non multiValued field " +
              sfield.getName() + ": " +field.getValue() );
      }
     

      // load each field value
      boolean hasField = false;
      try {
        for( Object v : field ) {
          if( v == null ) {
            continue;
          }
          String val = null;
          hasField = true;
          boolean isBinaryField = false;
          if (sfield != null && sfield.getType() instanceof BinaryField) {
            isBinaryField = true;
            BinaryField binaryField = (BinaryField) sfield.getType();
            Fieldable f = binaryField.createField(sfield,v,boost);
            if(f != null){
              out.add(f);
            }
            used = true;
          } else {
            // TODO!!! HACK -- date conversion
            if (sfield != null && v instanceof Date && sfield.getType() instanceof DateField) {
              DateField df = (DateField) sfield.getType();
              val = df.toInternal((Date) v) + 'Z';
            } else if (v != null) {
              val = v.toString();
            }
 
            if (sfield != null) {
              used = true;
              addField(out, sfield, val, boost);
            }
          }
 
          // Check if we should copy this field to any other fields.
          // This could happen whether it is explicit or not.
          List<CopyField> copyFields = schema.getCopyFieldsList(name);
          for (CopyField cf : copyFields) {
            SchemaField destinationField = cf.getDestination();
            // check if the copy field is a multivalued or not
            if (!destinationField.multiValued() && out.get(destinationField.getName()) != null) {
              throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
                      "ERROR: "+getID(doc, schema)+"multiple values encountered for non multiValued copy field " +
                              destinationField.getName() + ": " + val);
            }
 
            used = true;
            //Don't worry about poly fields here
            Fieldable [] fields = null;
            if (isBinaryField) {
              if (destinationField.getType() instanceof BinaryField) {
                BinaryField binaryField = (BinaryField) destinationField.getType();
                //TODO: safe to assume that binary fields only create one?
                fields = new Fieldable[]{binaryField.createField(destinationField, v, boost)};
              }
            } else {
              fields = destinationField.createFields(cf.getLimitedValue(val), boost);
            }
            if (fields != null) { // null fields are not added
              for (Fieldable f : fields) {
                out.add(f);
              }
            }
          }
         
          // In lucene, the boost for a given field is the product of the
          // document boost and *all* boosts on values of that field.
          // For multi-valued fields, we only want to set the boost on the
          // first field.
          boost = 1.0f;
        }
      }
      catch( Exception ex ) {
        throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,
            "ERROR: "+getID(doc, schema)+"Error adding field '" +
              field.getName() + "'='" +field.getValue()+"'", ex );
      }
     
      // make sure the field was used somehow...
      if( !used && hasField ) {
        throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,
            "ERROR: "+getID(doc, schema)+"unknown field '" +name + "'");
      }
    }
   
       
    // Now validate required fields or add default values
    // fields with default values are defacto 'required'
    for (SchemaField field : schema.getRequiredFields()) {
      if (out.getFieldable(field.getName() ) == null) {
        if (field.getDefaultValue() != null) {
          addField(out, field, field.getDefaultValue(), 1.0f);
        }
        else {
//          String msg = getID(doc, schema) + "missing required field: " + field.getName();
//          throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, msg );
        }
      }
    }
    return out;
  }

 
  /**
   * Add fields from the solr document
   *
   * TODO: /!\ NOTE /!\ This semantics of this function are still in flux. 
   * Something somewhere needs to be able to fill up a SolrDocument from
   * a lucene document - this is one place that may happen.  It may also be
   * moved to an independent function
   *
   * @since solr 1.3
   */
  public SolrDocument loadStoredFields( SolrDocument doc, Document luceneDoc  )
  {
    for( Object f : luceneDoc.getFields() ) {
      Fieldable field = (Fieldable)f;
      if( field.isStored() ) {
        SchemaField sf = schema.getField( field.name() );
        if( !schema.isCopyFieldTarget( sf ) ) {
          doc.addField( field.name(), sf.getType().toObject( field ) );
        }
      }
    }
    return doc;
  }
}
TOP

Related Classes of org.apache.solr.update.DocumentBuilder

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.