Package org.apache.mahout.text

Source Code of org.apache.mahout.text.ReadOnlyFileSystemDirectory$FileSystemIndexOutput

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.mahout.text;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.lucene.store.BaseDirectory;
import org.apache.lucene.store.BufferedIndexInput;
import org.apache.lucene.store.BufferedIndexOutput;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.Lock;
import org.apache.lucene.store.LockFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.Collection;

//TODO: is there a better way of doing this in Lucene 4.x?

/**
* This class implements a read-only Lucene Directory on top of a general FileSystem.
* Currently it does not support locking.
* <p/>
* // TODO: Rename to FileSystemReadOnlyDirectory
*/
public class ReadOnlyFileSystemDirectory extends BaseDirectory {

  private final FileSystem fs;
  private final Path directory;
  private final int ioFileBufferSize;

  private static final Logger log = LoggerFactory.getLogger(ReadOnlyFileSystemDirectory.class);

      /**
       * Constructor
       *
       * @param fs - filesystem
       * @param directory - directory path
       * @param create - if true create the directory
       * @param conf - MR Job Configuration
       * @throws IOException
       */

  public ReadOnlyFileSystemDirectory(FileSystem fs, Path directory, boolean create,
                                     Configuration conf) throws IOException {

    this.fs = fs;
    this.directory = directory;
    this.ioFileBufferSize = conf.getInt("io.file.buffer.size", 4096);

    if (create) {
      create();
    }

    boolean isDir = false;
    try {
      FileStatus status = fs.getFileStatus(directory);
      if (status != null) {
        isDir = status.isDir();
      }
    } catch (IOException e) {
      log.error(e.getMessage(), e);
    }
    if (!isDir) {
      throw new IOException(directory + " is not a directory");
    }
  }


  private void create() throws IOException {
    if (!fs.exists(directory)) {
      fs.mkdirs(directory);
    }

    boolean isDir = false;
    try {
      FileStatus status = fs.getFileStatus(directory);
      if (status != null) {
        isDir = status.isDir();
      }
    } catch (IOException e) {
      log.error(e.getMessage(), e);
    }
    if (!isDir) {
      throw new IOException(directory + " is not a directory");
    }

    // clear old index files
    FileStatus[] fileStatus =
            fs.listStatus(directory, LuceneIndexFileNameFilter.getFilter());
    for (FileStatus status : fileStatus) {
      if (!fs.delete(status.getPath(), true)) {
        throw new IOException("Cannot delete index file "
                + status.getPath());
      }
    }
  }

  public String[] list() throws IOException {
    FileStatus[] fileStatus =
            fs.listStatus(directory, LuceneIndexFileNameFilter.getFilter());
    String[] result = new String[fileStatus.length];
    for (int i = 0; i < fileStatus.length; i++) {
      result[i] = fileStatus[i].getPath().getName();
    }
    return result;
  }

  @Override
  public String[] listAll() throws IOException {
    return list();
  }

  @Override
  public boolean fileExists(String name) throws IOException {
    return fs.exists(new Path(directory, name));
  }

  @Override
  public long fileLength(String name) throws IOException {
    return fs.getFileStatus(new Path(directory, name)).getLen();
  }

  @Override
  public void deleteFile(String name) throws IOException {
    if (!fs.delete(new Path(directory, name), true)) {
      throw new IOException("Cannot delete index file " + name);
    }
  }

  @Override
  public IndexOutput createOutput(String name, IOContext context) throws IOException {
    //TODO: What should we be doing with the IOContext here, if anything?
    Path file = new Path(directory, name);
    if (fs.exists(file) && !fs.delete(file, true)) {
      // delete the existing one if applicable
      throw new IOException("Cannot overwrite index file " + file);
    }

    return new FileSystemIndexOutput(file, ioFileBufferSize);
  }

  @Override
  public void sync(Collection<String> names) throws IOException {
    // do nothing, as this is read-only
  }

  @Override
  public IndexInput openInput(String name, IOContext context) throws IOException {
    return new FileSystemIndexInput(new Path(directory, name), ioFileBufferSize);
  }

  @Override
  public Lock makeLock(final String name) {
    return new Lock() {
      public boolean obtain() {
        return true;
      }

      public void release() {
      }

      public boolean isLocked() {
        throw new UnsupportedOperationException();
      }

      public String toString() {
        return "Lock@" + new Path(directory, name);
      }
    };
  }

  @Override
  public void clearLock(String name) throws IOException {
    // do nothing
  }

  @Override
  public void close() throws IOException {
    // do not close the file system
  }

  @Override
  public void setLockFactory(LockFactory lockFactory) throws IOException {
    // do nothing
  }

  @Override
  public LockFactory getLockFactory() {
    return null;
  }

  @Override
  public String toString() {
    return this.getClass().getName() + "@" + directory;
  }

  private class FileSystemIndexInput extends BufferedIndexInput implements Cloneable {

    // shared by clones
    private class Descriptor {
      public final FSDataInputStream in;
      public long position; // cache of in.getPos()

      public Descriptor(Path file, int ioFileBufferSize) throws IOException {
        this.in = fs.open(file, ioFileBufferSize);
      }
    }

    private final Path filePath; // for debugging
    private final Descriptor descriptor;
    private final long length;
    private boolean isOpen;
    private boolean isClone;

    public FileSystemIndexInput(Path path, int ioFileBufferSize)
      throws IOException {
      super("FSII_" + path.getName(), ioFileBufferSize);
      filePath = path;
      descriptor = new Descriptor(path, ioFileBufferSize);
      length = fs.getFileStatus(path).getLen();
      isOpen = true;
    }

    @Override
    protected void readInternal(byte[] b, int offset, int len)
      throws IOException {
      long position = getFilePointer();
      if (position != descriptor.position) {
        descriptor.in.seek(position);
        descriptor.position = position;
      }
      int total = 0;
      do {
        int i = descriptor.in.read(b, offset + total, len - total);
        if (i == -1) {
          throw new IOException("Read past EOF");
        }
        descriptor.position += i;
        total += i;
      } while (total < len);
    }

    @Override
    public void close() throws IOException {
      if (!isClone) {
        if (isOpen) {
          descriptor.in.close();
          isOpen = false;
        } else {
          throw new IOException("Index file " + filePath + " already closed");
        }
      }
    }

    @Override
    protected void seekInternal(long position) {
      // handled in readInternal()
    }

    @Override
    public long length() {
      return length;
    }

    @Override
    protected void finalize() throws Throwable {
      super.finalize();
      if (!isClone && isOpen) {
        close(); // close the file
      }
    }

    @Override
    public BufferedIndexInput clone() {
      FileSystemIndexInput clone = (FileSystemIndexInput) super.clone();
      clone.isClone = true;
      return clone;
    }
  }

  private class FileSystemIndexOutput extends BufferedIndexOutput {

    private final Path filePath; // for debugging
    private final FSDataOutputStream out;
    private boolean isOpen;

    public FileSystemIndexOutput(Path path, int ioFileBufferSize)
      throws IOException {
      filePath = path;
      // overwrite is true by default
      out = fs.create(path, true, ioFileBufferSize);
      isOpen = true;
    }

    @Override
    public void flushBuffer(byte[] b, int offset, int size) throws IOException {
      out.write(b, offset, size);
    }

    @Override
    public void close() throws IOException {
      if (isOpen) {
        super.close();
        out.close();
        isOpen = false;
      } else {
        throw new IOException("Index file " + filePath + " already closed");
      }
    }

    @Override
    public void seek(long pos) throws IOException {
      throw new UnsupportedOperationException();
    }

    @Override
    public long length() throws IOException {
      return out.getPos();
    }

    @Override
    protected void finalize() throws Throwable {
      super.finalize();
      if (isOpen) {
        close(); // close the file
      }
    }
  }

}
TOP

Related Classes of org.apache.mahout.text.ReadOnlyFileSystemDirectory$FileSystemIndexOutput

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.