Package it.unimi.dsi.mg4j.document

Source Code of it.unimi.dsi.mg4j.document.StringArrayDocumentCollection

package it.unimi.dsi.mg4j.document;

import it.unimi.dsi.fastutil.io.FastByteArrayInputStream;
import it.unimi.dsi.fastutil.objects.Reference2ObjectArrayMap;
import it.unimi.dsi.fastutil.objects.Reference2ObjectMap;

import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;

import org.apache.commons.configuration.ConfigurationException;

/** A document collection explicitly defined by a sequence of strings (mainly useful for testing).
*
* <p>Every string in the provided sequence is considered a document,
* and the stream returned by the collection is the UTF-8 encoding of the string. By default,
* the collection uses an {@link IdentityDocumentFactory}, but you can also
* {@linkplain #StringArrayDocumentCollection(DocumentFactory, String[]) specify your own factory}.
*/

public class StringArrayDocumentCollection extends AbstractDocumentCollection implements Serializable {
  private static final long serialVersionUID = 1L;
  final public String[] document;
  final DocumentFactory factory;
  private final boolean uris;

  public StringArrayDocumentCollection( final String... document ) throws ConfigurationException {
    this( false, document );
  }

  public StringArrayDocumentCollection( final DocumentFactory factory, final String... document ) {
    this( false, factory, document );
  }

  public StringArrayDocumentCollection( boolean uris, final String... document ) throws ConfigurationException {
    this( uris, new IdentityDocumentFactory( new String[] { "encoding=UTF-8" } ), document );
  }

  public StringArrayDocumentCollection( boolean uris, final DocumentFactory factory, final String... document ) {
    this.uris = uris;
    this.factory = factory;
    this.document = document;
  }

  public int size() {
    return document.length;
  }

  public Document document( final int index ) throws IOException {
    return factory.getDocument( stream( index ), metadata( index ) );
  }

  public InputStream stream( final int index ) throws IOException {
    return new FastByteArrayInputStream( document[ index ].getBytes( "UTF-8" ) );
  }

  public Reference2ObjectMap<Enum<?>,Object> metadata( int index ) throws IOException {
    final Reference2ObjectArrayMap<Enum<?>, Object> metadata = new Reference2ObjectArrayMap<Enum<?>,Object>(1);
    metadata.put( PropertyBasedDocumentFactory.MetadataKeys.TITLE, "Document " + index );
    if ( uris ) metadata.put( PropertyBasedDocumentFactory.MetadataKeys.URI, "doc:" + index );
    return metadata;
  }

  public DocumentCollection copy() {
    return this;
  }

  public DocumentFactory factory() {
    return factory;
  };
}
TOP

Related Classes of it.unimi.dsi.mg4j.document.StringArrayDocumentCollection

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.