Package abra

Source Code of abra.Sam2Fastq

/* Copyright 2013 University of North Carolina at Chapel Hill.  All rights reserved. */
package abra;

import java.io.File;
import java.io.IOException;
import java.util.List;


import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMFileHeader.SortOrder;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMFileWriter;
import net.sf.samtools.SAMFileWriterFactory;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMFileReader.ValidationStringency;

/**
* Responsible for creating fastq files for reads of interest to be realigned.
* Those reads that are not eligible for realignment are written directly to the output BAM file.
*
* @author Lisle Mose (lmose at unc dot edu)
*/
public class Sam2Fastq {
 
  public static final String FIELD_DELIMITER = "~|";
  private static final int MAX_SAM_READ_NAME_LENGTH = 255;
 
  public static final int MIN_OFF_TARGET_MAPQ = 30;
 
  private FastqOutputFile output1;
  private ReverseComplementor reverseComplementor = new ReverseComplementor();
  private boolean shouldIdentifyEndByReadId = false;
  private String end1Suffix;
  private String end2Suffix;
  private RegionTracker regionTracker;
   
  /**
   * Convert the input SAM/BAM file into a single fastq file.
   * Input SAM files that contain multiple mappings should be sorted by read name.
   */
  public void convert(String inputSam, String outputFastq, CompareToReference2 c2r,
      SAMFileWriter writer, boolean isPairedEnd,
      List<Feature> regions, int minMappingQuality) throws IOException {
   
    System.out.println("sam: " + inputSam);
   
        SAMFileReader reader = new SAMFileReader(new File(inputSam));
        reader.setValidationStringency(ValidationStringency.SILENT);
       
        output1 = new FastqOutputFile();
        output1.init(outputFastq);
        int lineCnt = 0;
       
        this.regionTracker = new RegionTracker(regions, reader.getFileHeader());
       
        for (SAMRecord read : reader) {
        if (SAMRecordUtils.isPrimary(read) && (!SAMRecordUtils.isFiltered(isPairedEnd, read))) {
         
          // These tags can be lengthy, so remove them.
          // TODO: Improve the way this is handled
          read.setAttribute("XA", null);
          read.setAttribute("OQ", null);
          read.setAttribute("MD", null);
          read.setAttribute("BQ", null);
         
          int yx = 0;
         
          boolean isAmbiguous = !read.getReadUnmappedFlag() && read.getMappingQuality() == 0;
         
          if ((!read.getReadFailsVendorQualityCheckFlag()) && (!isAmbiguous)) {
            // Calculate the number of mismatches to reference for this read.
            if (c2r != null) {
              try {
                yx = SAMRecordUtils.getEditDistance(read, c2r);
              } catch (ArrayIndexOutOfBoundsException e) {
                System.out.println("Index error for read: " + read.getSAMString());
                throw e;
              }
            } else {
              yx = read.getReadLength();
            }
           
            read.setAttribute("YX", yx);
          }

          boolean offTargetFiltered = false;
        if (yx > 0 && !read.getReadUnmappedFlag() && read.getMappingQuality() < MIN_OFF_TARGET_MAPQ && !regionTracker.isInRegion(read)) {
          read.setAttribute("YR", 2);
          offTargetFiltered = true;
//          System.out.println("Filtering off target: " + read.getSAMString());
        }
         
          if ((yx > 0 && !offTargetFiltered && read.getMappingQuality() >= minMappingQuality) || (read.getReadUnmappedFlag())) {
           
            if ((!read.getReadUnmappedFlag()) && (!regionTracker.isInRegion(read))) {
              read.setAttribute("YR", 1);
            }
           
            // Eligible for realignment, output FASTQ record
            output1.write(samReadToFastqRecord(read, c2r));
          } else if (writer != null) {
            // Either xactly matches reference or failed vendor QC or low mapq, so
            // output directly to final BAM
            writer.addAlignment(read);
          }
        }
       
            lineCnt++;
            if ((lineCnt % 1000000) == 0) {
                System.out.println("record: " + lineCnt);
            }
        }
               
        output1.close();
        reader.close();
  }
 
  private FastqRecord samReadToFastqRecord(SAMRecord read, CompareToReference2 c2r) {
   
    String bases = read.getReadString();
    String qualities = read.getBaseQualityString();
   
    if (read.getReadNegativeStrandFlag()) {
      bases = reverseComplementor.reverseComplement(bases);
      qualities = reverseComplementor.reverse(qualities);
    }
   
    read.setReadString("");
    read.setBaseQualityString("");
   
    String readStr = read.getSAMString();
/*   
    if (readStr.length() > MAX_SAM_READ_NAME_LENGTH) {
      String msg = "Warning!  Max SAM Read name length exceeded for: " + readStr;
      System.out.println(msg);
//      throw new RuntimeException(msg);
    }
*/

    readStr = readStr.replace("\t", FIELD_DELIMITER);
   
    // Trim newline if applicable
    if (readStr.charAt(readStr.length()-1) == '\n') {
      readStr = readStr.substring(0, readStr.length()-1);
    }
   
    String readName = readStr;
   
    FastqRecord fastq = new FastqRecord("@" + readName, bases, qualities);
   
    return fastq;
  }
 
  private boolean isFusion(SAMRecord read) {
    return read.getAttribute("ZF") != null;
  }
 
  private boolean isFirstInPair(SAMRecord read) {
    boolean isFirstInPair;
   
    if (shouldIdentifyEndByReadId) {
      isFirstInPair = read.getReadName().endsWith(end1Suffix);
     
    } else {
      isFirstInPair = read.getFirstOfPairFlag();
    }
   
    return isFirstInPair;
  }
 
  private boolean isSecondInPair(SAMRecord read) {
    boolean isSecondInPair;
   
    if (shouldIdentifyEndByReadId) {
      isSecondInPair = read.getReadName().endsWith(end2Suffix);
     
    } else {
      isSecondInPair = read.getSecondOfPairFlag();
    }
   
    return isSecondInPair;
  }
 
  public void setEndSuffixes(String end1Suffix, String end2Suffix) {
    this.shouldIdentifyEndByReadId = true;
    this.end1Suffix = end1Suffix;
    this.end2Suffix = end2Suffix;
  }

/*
  public static void run(String[] args) throws IOException {
    Sam2FastqOptions options = new Sam2FastqOptions();
    options.parseOptions(args);
   
    if (options.isValid()) {
      long s = System.currentTimeMillis();
      System.out.println("sam2fastq starting");
     
      Sam2Fastq sam2Fastq = new Sam2Fastq();
      if (options.isPairedEnd()) {
       
        if (options.shouldIdEndByReadName()) {
          sam2Fastq.setEndSuffixes(options.getEnd1Suffix(), options.getEnd2Suffix());
        }
       
        sam2Fastq.setMapspliceFusions(options.isMapspliceFusions());
       
        sam2Fastq.convert(options.getInputFile(), options.getFastq1(), options.getFastq2());
      } else {
        sam2Fastq.convert(options.getInputFile(), options.getFastq1());
      }
     
      long e = System.currentTimeMillis();
      System.out.println("sam2fastq done.  Elapsed secs: " + (e-s)/1000);
    }
  }
*/
/* 
  public static void main(String[] args) throws Exception {
    String argz = "--end1 /1 --end2 /2 --in /home/lisle/sam2fastq/round2/test2.sam  --fastq1 /home/lisle/sam2fastq/round2/1.fastq --fastq2 /home/lisle/sam2fastq/round2/2.fastq --mapsplice";
   
    run(argz.split(" "));
  }
*/
  public static void main(String[] args) throws Exception {
   
//    String inputSam = "/home/lmose/dev/abra/region_tracker/normal.sort.bam";
    String inputSam = "/home/lmose/dev/abra/region_tracker/chr12.small.bam";
    SAMFileReader reader = new SAMFileReader(new File(inputSam));
    SAMFileHeader header = reader.getFileHeader();
    reader.close();
       
    CompareToReference2 c2r = null;
   
    SAMFileWriterFactory writerFactory = new SAMFileWriterFactory();
//    writerFactory.setUseAsyncIo(true);
   
    header.setSortOrder(SortOrder.unsorted);
   
    SAMFileWriter writer = writerFactory.makeSAMOrBAMWriter(
        header, false, new File("/home/lmose/dev/abra/region_tracker/out.bam"));
   
    Sam2Fastq s2f = new Sam2Fastq();
   
   
    RegionLoader loader = new RegionLoader();
    List<Feature> regions = loader.load("/home/lmose/dev/abra/region_tracker/uncseq5.bed");
   
    regions = RegionLoader.collapseRegions(regions, 100);
   
    regions = ReAligner.splitRegions(regions);   
   
    long s = System.currentTimeMillis();
    s2f.convert(inputSam, "/home/lmose/dev/abra/region_tracker/t2.fastq.gz", c2r, writer, false,
        regions, 20);
    long e = System.currentTimeMillis();
   
   
    writer.close();
   
    System.out.println("Elapsed: " + (e-s)/1000);
   
   
//    s2f.convert("/home/lmose/dev/abra_wxs/21_1071/small_tumor.abra.bam", "/home/lmose/dev/abra_wxs/21_1071/t.fastq", c2r);
  }

/*
  public static void main(String[] args) throws Exception {
   
    String inputSam = "/home/lmose/dev/ayc/opt/opt2/s2f/t1.bam";
    SAMFileReader reader = new SAMFileReader(new File(inputSam));
    SAMFileHeader header = reader.getFileHeader();
    reader.close();
       
    CompareToReference2 c2r = new CompareToReference2();
    c2r.init("/home/lmose/reference/chr1/chr1.fa");
   
    SAMFileWriterFactory writerFactory = new SAMFileWriterFactory();
//    writerFactory.setUseAsyncIo(true);
   
    header.setSortOrder(SortOrder.unsorted);
   
    SAMFileWriter writer = writerFactory.makeSAMOrBAMWriter(
        header, false, new File("/home/lmose/dev/ayc/opt/opt2/s2f/output_t1.bam"));
   
    Sam2Fastq s2f = new Sam2Fastq();
   
   
    RegionLoader loader = new RegionLoader();
    List<Feature> regions = loader.load("/home/lmose/dev/ayc/regions/clinseq5/uncseq5.gtf");
   
    regions = RegionLoader.collapseRegions(regions, 100);
   
    regions = ReAligner.splitRegions(regions);   
   
    long s = System.currentTimeMillis();
    s2f.convert(inputSam, "/home/lmose/dev/ayc/opt/opt2/s2f/t2.fastq.gz", c2r, writer, false,
        regions);
    long e = System.currentTimeMillis();
   
   
    writer.close();
   
    System.out.println("Elapsed: " + (e-s)/1000);
   
   
//    s2f.convert("/home/lmose/dev/abra_wxs/21_1071/small_tumor.abra.bam", "/home/lmose/dev/abra_wxs/21_1071/t.fastq", c2r);
  }
  */
 
  /*
  public static void main(String[] args) throws Exception {
   
    String inputSam = "/home/lmose/dev/ayc/opt/t7.bam";
    SAMFileReader reader = new SAMFileReader(new File(inputSam));
    SAMFileHeader header = reader.getFileHeader();
    reader.close();
       
    CompareToReference2 c2r = new CompareToReference2();
    c2r.init("/home/lmose/reference/chr7/chr7.fa");
   
    SAMFileWriterFactory writerFactory = new SAMFileWriterFactory();
//    writerFactory.setUseAsyncIo(true);
   
    header.setSortOrder(SortOrder.unsorted);
   
    SAMFileWriter writer = writerFactory.makeSAMOrBAMWriter(
        header, false, new File("/home/lmose/dev/ayc/opt/output_t7.bam"));
   
    Sam2Fastq s2f = new Sam2Fastq();
   
    long s = System.currentTimeMillis();
   
   
    System.out.println("chr1: " + header.getSequenceIndex("chr1"));
    System.out.println("chr2: " + header.getSequenceIndex("chr2"));
    System.out.println("chrY: " + header.getSequenceIndex("chrY"));
    System.out.println("chrM: " + header.getSequenceIndex("chrM"));
   
//    String inputSam, String outputFastq, CompareToReference2 c2r,
//    SAMFileHeader header, SAMFileWriter writer, ReAligner realigner
   
    GtfLoader loader = new GtfLoader();
    List<Feature> regions = loader.load("/home/lmose/dev/ayc/regions/clinseq5/uncseq5.gtf");
   
    regions = ReAligner.collapseRegions(regions, 100);
   
    regions = ReAligner.splitRegions(regions);   
   
    s2f.convert(inputSam, "/home/lmose/dev/ayc/opt/t7.fastq.gz", c2r, header, writer, false,
        regions);
    long e = System.currentTimeMillis();
   
    System.out.println("Elapsed: " + (e-s)/1000);
   
    writer.close();
   
//    s2f.convert("/home/lmose/dev/abra_wxs/21_1071/small_tumor.abra.bam", "/home/lmose/dev/abra_wxs/21_1071/t.fastq", c2r);
  }
  */
TOP

Related Classes of abra.Sam2Fastq

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.