Package net.sf.samtools

Source Code of net.sf.samtools.CRAMFileReader

package net.sf.samtools;

import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.NoSuchElementException;

import net.sf.cram.ReadWrite;
import net.sf.cram.ReadWrite.CramHeader;
import net.sf.cram.io.CountingInputStream;
import net.sf.cram.structure.Container;
import net.sf.picard.reference.ReferenceSequenceFile;
import net.sf.samtools.BAMFileReader.QueryType;
import net.sf.samtools.SAMFileHeader.SortOrder;
import net.sf.samtools.SAMFileReader.ValidationStringency;
import net.sf.samtools.util.CloseableIterator;
import net.sf.samtools.util.RuntimeEOFException;
import net.sf.samtools.util.SeekableFileStream;
import net.sf.samtools.util.SeekableStream;

public class CRAMFileReader extends SAMFileReader.ReaderImplementation {
  private File file;
  private ReferenceSequenceFile referenceSequenceFile;
  private CramHeader header;
  private InputStream is;
  private SAMRecordIterator it;
  private BAMIndex mIndex;
  private File mIndexFile;
  private boolean mEnableIndexCaching;
  private File mIndexStream;
  private boolean mEnableIndexMemoryMapping;

  private ValidationStringency validationStringency;

  public CRAMFileReader(File file, InputStream is,
      ReferenceSequenceFile referenceSequenceFile) {
    this.file = file;
    this.is = is;
    this.referenceSequenceFile = referenceSequenceFile;

    if (file == null)
      getIterator();
  }

  public CRAMFileReader(File bamFile, File indexFile,
      ReferenceSequenceFile referenceSequenceFile) {
    this.file = bamFile;
    this.mIndexFile = indexFile;
    this.referenceSequenceFile = referenceSequenceFile;

    if (file == null)
      getIterator();
  }

  private void readHeader() throws FileNotFoundException, IOException {
    header = ReadWrite.readCramHeader(new FileInputStream(file));
  }

  @Override
  void enableFileSource(SAMFileReader reader, boolean enabled) {
    // throw new RuntimeException("Not implemented.");
  }

  @Override
  void enableIndexCaching(boolean enabled) {
    // throw new RuntimeException("Not implemented.");
  }

  @Override
  void enableIndexMemoryMapping(boolean enabled) {
    // throw new RuntimeException("Not implemented.");
  }

  @Override
  void enableCrcChecking(boolean enabled) {
    // throw new RuntimeException("Not implemented.");
  }

  @Override
  void setSAMRecordFactory(SAMRecordFactory factory) {
  }

  @Override
  boolean hasIndex() {
    return mIndex != null || mIndexFile != null || mIndexStream != null;
  }

  @Override
  BAMIndex getIndex() {
    if (!hasIndex())
      throw new SAMException("No index is available for this BAM file.");
    if (mIndex == null) {
      if (mIndexFile != null)
        mIndex = mEnableIndexCaching ? new CachingBAMFileIndex(
            mIndexFile, getFileHeader().getSequenceDictionary(),
            mEnableIndexMemoryMapping) : new DiskBasedBAMFileIndex(
            mIndexFile, getFileHeader().getSequenceDictionary(),
            mEnableIndexMemoryMapping);
      else
        mIndex = mEnableIndexCaching ? new CachingBAMFileIndex(
            mIndexStream, getFileHeader().getSequenceDictionary())
            : new DiskBasedBAMFileIndex(mIndexStream,
                getFileHeader().getSequenceDictionary());
    }
    return mIndex;
  }

  @Override
  SAMFileHeader getFileHeader() {
    try {
      if (header == null)
        readHeader();
    } catch (Exception e) {
      throw new RuntimeException(e);
    }
    return header.samFileHeader;
  }

  @Override
  CloseableIterator<SAMRecord> getIterator() {
    if (it != null && file == null)
      return it;
    try {
      SAMIterator si = null;
      if (file != null)
        si = new SAMIterator(new FileInputStream(file),
            referenceSequenceFile);
      else
        si = new SAMIterator(is, referenceSequenceFile);

      si.setValidationStringency(validationStringency);
      header = si.getCramHeader();
      it = si;
      return it;
    } catch (Exception e) {
      throw new RuntimeException(e);
    }
  }

  @Override
  CloseableIterator<SAMRecord> getIterator(SAMFileSpan fileSpan) {
    throw new RuntimeException("Not implemented.");
  }

  @Override
  SAMFileSpan getFilePointerSpanningReads() {
    throw new RuntimeException("Not implemented.");
  }

  @Override
  CloseableIterator<SAMRecord> query(String sequence, int start, int end,
      boolean contained) {
    CloseableIterator<SAMRecord> iterator = queryAlignmentStart(sequence,
        start);

    QueryType qt = QueryType.CONTAINED;
    if (!contained)
      qt = QueryType.OVERLAPPING;
    if (end == -1)
      qt = QueryType.STARTING_AT;
    return new BAMQueryFilteringIterator(iterator, sequence, start, end, qt);
  }

  private static SAMRecordIterator emptyIterator = new SAMRecordIterator() {

    @Override
    public boolean hasNext() {
      return false;
    }

    @Override
    public SAMRecord next() {
      throw new RuntimeException("No records.");
    }

    @Override
    public void remove() {
      throw new RuntimeException("Remove not supported.");
    }

    @Override
    public void close() {
    }

    @Override
    public SAMRecordIterator assertSorted(SortOrder sortOrder) {
      // TODO Auto-generated method stub
      return null;
    }
  };

  @Override
  public CloseableIterator<SAMRecord> queryAlignmentStart(String sequence,
      int start) {
    long[] filePointers = null;

    // Hit the index to determine the chunk boundaries for the required
    // data.
    final SAMFileHeader fileHeader = getFileHeader();
    final int referenceIndex = fileHeader.getSequenceIndex(sequence);
    if (referenceIndex != -1) {
      final BAMIndex fileIndex = getIndex();
      final BAMFileSpan fileSpan = fileIndex.getSpanOverlapping(
          referenceIndex, start, -1);
      filePointers = fileSpan != null ? fileSpan.toCoordinateArray()
          : null;
    }

    if (filePointers == null || filePointers.length == 0)
      return emptyIterator;

    SeekableStream s = null;
    if (file != null) {
      try {
        s = new SeekableFileStream(file);
      } catch (FileNotFoundException e) {
        throw new RuntimeException(e);
      }
    } else if (is instanceof SeekableStream)
      s = (SeekableStream) is;

    SAMIterator si = null;
    try {
      s.seek(0);
      si = new SAMIterator(s, referenceSequenceFile);
      si.setValidationStringency(validationStringency);
      it = si;
    } catch (IOException e) {
      throw new RuntimeEOFException(e);
    }

    Container c = null;
    for (int i = 0; i < filePointers.length; i += 2) {
      long containerOffset = filePointers[i] >>> 16;
      int sliceIndex = (int) ((filePointers[i] << 48) >>> 48);
      try {
        s.seek(containerOffset);
        // the following is not optimal because this is container-level
        // access, not slice:

        // CountingInputStream cis = new CountingInputStream(s) ;
        c = ReadWrite.readContainerHeader(s);
        // long headerSize = cis.getCount() ;
        // int sliceOffset = c.landmarks[sliceIndex] ;
        if (c.alignmentStart + c.alignmentSpan > start) {
          s.seek(containerOffset);
          // s.seek(containerOffset + headerSize + sliceOffset);
          return si;
        }
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
    }

    it = emptyIterator;
    return it;
  }

  @Override
  public CloseableIterator<SAMRecord> queryUnmapped() {
    final long startOfLastLinearBin = getIndex().getStartOfLastLinearBin();

    SeekableStream s = null;
    if (file != null) {
      try {
        s = new SeekableFileStream(file);
      } catch (FileNotFoundException e) {
        throw new RuntimeException(e);
      }
    } else if (is instanceof SeekableStream)
      s = (SeekableStream) is;

    SAMIterator si = null;
    try {
      s.seek(0);
      si = new SAMIterator(s, referenceSequenceFile);
      si.setValidationStringency(validationStringency);
      s.seek(startOfLastLinearBin);
      it = si;
    } catch (IOException e) {
      throw new RuntimeEOFException(e);
    }

    return it;
  }

  @Override
  void close() {
    if (it != null)
      it.close();
    if (is != null)
      try {
        is.close();
      } catch (IOException e) {
      }

    if (mIndex != null)
      mIndex.close();
  }

  @Override
  void setValidationStringency(ValidationStringency validationStringency) {
    this.validationStringency = validationStringency;
  }

  @Override
  ValidationStringency getValidationStringency() {
    return validationStringency;
  }

  public static boolean isCRAMFile(File file) throws IOException {
    final int buffSize = CramHeader.magick.length;
    FileInputStream fis = new FileInputStream(file);
    DataInputStream dis = new DataInputStream(fis);
    final byte[] buffer = new byte[buffSize];
    dis.readFully(buffer);
    dis.close();

    return Arrays.equals(buffer, CramHeader.magick);
  }

  /**
   * A decorating iterator that filters out records that are outside the
   * bounds of the given query parameters.
   */
  private class BAMQueryFilteringIterator implements
      CloseableIterator<SAMRecord> {
    /**
     * The wrapped iterator.
     */
    private final CloseableIterator<SAMRecord> wrappedIterator;

    /**
     * The next record to be returned. Will be null if no such record
     * exists.
     */
    private SAMRecord mNextRecord;

    private final int mReferenceIndex;
    private final int mRegionStart;
    private final int mRegionEnd;
    private final QueryType mQueryType;
    private boolean isClosed = false;

    public BAMQueryFilteringIterator(
        final CloseableIterator<SAMRecord> iterator,
        final String sequence, final int start, final int end,
        final QueryType queryType) {
      this.wrappedIterator = iterator;
      final SAMFileHeader fileHeader = getFileHeader();
      mReferenceIndex = fileHeader.getSequenceIndex(sequence);
      mRegionStart = start;
      if (queryType == QueryType.STARTING_AT) {
        mRegionEnd = mRegionStart;
      } else {
        mRegionEnd = (end <= 0) ? Integer.MAX_VALUE : end;
      }
      mQueryType = queryType;
      mNextRecord = advance();
    }

    /**
     * Returns true if a next element exists; false otherwise.
     */
    public boolean hasNext() {
      if (isClosed)
        throw new IllegalStateException("Iterator has been closed");
      return mNextRecord != null;
    }

    /**
     * Gets the next record from the given iterator.
     *
     * @return The next SAM record in the iterator.
     */
    public SAMRecord next() {
      if (!hasNext())
        throw new NoSuchElementException(
            "BAMQueryFilteringIterator: no next element available");
      final SAMRecord currentRead = mNextRecord;
      mNextRecord = advance();
      return currentRead;
    }

    /**
     * Closes down the existing iterator.
     */
    public void close() {
      isClosed = true;
    }

    /**
     * @throws UnsupportedOperationException
     *             always.
     */
    public void remove() {
      throw new UnsupportedOperationException("Not supported: remove");
    }

    SAMRecord advance() {
      while (true) {
        // Pull next record from stream
        if (!wrappedIterator.hasNext())
          return null;

        final SAMRecord record = wrappedIterator.next();
        // If beyond the end of this reference sequence, end iteration
        final int referenceIndex = record.getReferenceIndex();
        if (referenceIndex != mReferenceIndex) {
          if (referenceIndex < 0 || referenceIndex > mReferenceIndex) {
            return null;
          }
          // If before this reference sequence, continue
          continue;
        }
        if (mRegionStart == 0 && mRegionEnd == Integer.MAX_VALUE) {
          // Quick exit to avoid expensive alignment end calculation
          return record;
        }
        final int alignmentStart = record.getAlignmentStart();
        // If read is unmapped but has a coordinate, return it if the
        // coordinate is within
        // the query region, regardless of whether the mapped mate will
        // be returned.
        final int alignmentEnd;
        if (mQueryType == QueryType.STARTING_AT) {
          alignmentEnd = -1;
        } else {
          alignmentEnd = (record.getAlignmentEnd() != SAMRecord.NO_ALIGNMENT_START ? record
              .getAlignmentEnd() : alignmentStart);
        }

        if (alignmentStart > mRegionEnd) {
          // If scanned beyond target region, end iteration
          return null;
        }
        // Filter for overlap with region
        if (mQueryType == QueryType.CONTAINED) {
          if (alignmentStart >= mRegionStart
              && alignmentEnd <= mRegionEnd) {
            return record;
          }
        } else if (mQueryType == QueryType.OVERLAPPING) {
          if (alignmentEnd >= mRegionStart
              && alignmentStart <= mRegionEnd) {
            return record;
          }
        } else {
          if (alignmentStart == mRegionStart) {
            return record;
          }
        }
      }
    }
  }
}
TOP

Related Classes of net.sf.samtools.CRAMFileReader

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.