Package edu.ucla.sspace.text

Source Code of edu.ucla.sspace.text.BufferedFileListDocumentIterator$Bufferer

/*
* Copyright 2009 David Jurgens
*
* This file is part of the S-Space package and is covered under the terms and
* conditions therein.
*
* The S-Space package is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation and distributed hereunder to you.
*
* THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
* EXPRESS OR IMPLIED ARE MADE.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
* NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
* PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
* WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
* RIGHTS.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package edu.ucla.sspace.text;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.IOError;

import java.util.ArrayDeque;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.Queue;

import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.atomic.AtomicInteger;

import edu.ucla.sspace.util.LineReader;


/**
* An iterator implementation that returns {@link Document} instances given a
* file that contains list of files, buffering their contents as necessary.
*
* <p>
*
* This class is thread-safe.
*/
public class BufferedFileListDocumentIterator implements Iterator<Document> {

    /**
     * The default maximum number of elements to have queued
     */
    private static final int DEFAULT_BUFFER_SIZE = 100;

    /**
     * The files in the list that have yet to be returned as {@code Document}
     * instances
     */
    private final Queue<String> filesToProcess;

    private final BlockingQueue<Document> documentsToReturn;

    private final AtomicInteger remaining;

    private volatile RuntimeException bufferError;

    /**
     * Creates an {@code Iterator} over the files listed in the provided file
     * using the default buffer size.
     *
     * @code fileListName a file containing a list of file names with one per
     *       line
     *
     * @throws IOException if any error occurs when reading {@code fileListName}
     */
    public BufferedFileListDocumentIterator(String fileListName)
            throws IOException {
        this(fileListName, DEFAULT_BUFFER_SIZE);
    }
   
    /**
     * Creates an {@code Iterator} over the files listed in the provided file.
     *
     * @code fileListName a file containing a list of file names with one per
     *       line
     *
     * @throws IOException if any error occurs when reading {@code fileListName}
     */
    public BufferedFileListDocumentIterator(String fileListName, int bufferSize)
            throws IOException {
 
  filesToProcess = new ArrayDeque<String>();
        documentsToReturn = new ArrayBlockingQueue<Document>(bufferSize);

  // read in all the files we have to process
        for (String line : new LineReader(new File(fileListName)))
      filesToProcess.offer(line.trim());     

        remaining = new AtomicInteger(filesToProcess.size());
        bufferError = null;

        Thread bufferingThread = new Thread(new Bufferer(),
            "BufferingThread for " + fileListName);
        bufferingThread.setDaemon(true);
        bufferingThread.start();
    }

    /**
     * Returns {@code true} if there are more documents to return.
     */
    public boolean hasNext() {
  return remaining.get() > 0;
    }
   
    /**
     * Returns the next document from the list.
     */
    public Document next() {
        if (!hasNext())
            throw new NoSuchElementException("No further documents");
        if (bufferError != null)
            throw bufferError;
        try {
            return documentsToReturn.take();
        } catch (InterruptedException ie) {
            throw new IOError(ie);
        }
    } 
   
    /**
     * Throws an {@link UnsupportedOperationException} if called.
     */
    public void remove() {
  throw new UnsupportedOperationException(
      "removing documents is not supported");
    }

    class Bufferer implements Runnable {
        public void run() {
            while (!filesToProcess.isEmpty()) {
                try {
                    String file = filesToProcess.poll();
                    documentsToReturn.put(new FileDocument(file, true));
                    remaining.decrementAndGet();
                } catch (Exception e) {
                    bufferError = new RuntimeException(e);
                    throw new IOError(e);
                }
            }
        }
    }
}
TOP

Related Classes of edu.ucla.sspace.text.BufferedFileListDocumentIterator$Bufferer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.