Package abra

Source Code of abra.CombineChimera3$ReadComparator

/* Copyright 2013 University of North Carolina at Chapel Hill.  All rights reserved. */
package abra;

import java.io.File;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;

import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMFileWriter;
import net.sf.samtools.SAMFileWriterFactory;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMFileHeader.SortOrder;

/**
* Combines chimeric alignments caused by indels when appropriate
*
* @author Lisle E. Mose (lmose at unc dot edu)
*/
public class CombineChimera3 {
 
  // TODO: Revisit this requirement. Consider removing when
  // introducing dynamic complex region identification
  public static final int MAX_GAP_LENGTH = 2000;
 
  // Minimum number of non clipped bases that must be between an identified indel and
  // the beginning/end of the contig
  int minIndelBuffer;
 
  public void combine(String input, String output, int minIndelBuffer, CompareToReference2 c2r) {
    IndelShifter indelShifter = new IndelShifter();
   
    this.minIndelBuffer = minIndelBuffer;
    SamMultiMappingReader reader = new SamMultiMappingReader(input);
   
    SAMFileHeader header = reader.getFileHeader();
    header.setSortOrder(SortOrder.unsorted);
   
    SAMFileWriter outputReadsBam = new SAMFileWriterFactory().makeSAMOrBAMWriter(
        header, true, new File(output));

    for (List<SAMRecord> readList : reader) {
      List<SAMRecord> processedReads = processRead(readList);
     
      for (SAMRecord read : processedReads) {
        outputReadsBam.addAlignment(indelShifter.shiftIndelsLeft(read, c2r));
      }
    }
   
    outputReadsBam.close();
    reader.close();
   
    System.out.println("Done combining chimeric reads.");
  }
 
  // SAMRecords in the input represent chimera for the same read
  protected List<SAMRecord> processRead(List<SAMRecord> readList) {
    List<SAMRecord> reads = null;
    boolean isCombined = false;
   
    for (SAMRecord read : readList) {
      read.clearAttributes();
      SAMRecordUtils.replaceHardClips(read);
    }
   
    sortReadsByPosition(readList);
   
    pruneLikelyInserts(readList);
   
    if (readList.size() == 2) {
      SAMRecord read1 = readList.get(0);
      SAMRecord read2 = readList.get(1);
     
      // Look for same reference, strand and multipart Cigar
      if ((read1.getReferenceName().equals(read2.getReferenceName())) &&
        (read1.getReadNegativeStrandFlag() == read2.getReadNegativeStrandFlag()) &&
        (read1.getCigarLength() >= 2) &&
        (read2.getCigarLength() >= 2) &&
        (Math.abs(read1.getAlignmentStart()-read2.getAlignmentStart()) < MAX_GAP_LENGTH)) {
       
        SAMRecord combinedRead = combineChimericReads(read1, read2);
        if (combinedRead != null) {
          isCombined = true;
          reads = new ArrayList<SAMRecord>();
          reads.add(combinedRead);
        }
      }
    }
   
    if (!isCombined) {
      reads = readList;
    }
   
    return reads;
  }
 
  private SAMRecord getTopHit(SAMRecord read1, SAMRecord read2) {
    SAMRecord topHit = null;
   
    if (isTopHit(read1)) {
      topHit = read1;
    } else if (isTopHit(read2)) {
      topHit = read2;
    }
   
    return topHit;
  }
 
  private boolean isTopHit(SAMRecord read) {
    // 0x800 = supplementary alignment from SAM spec v1.5
    return ((read.getFlags() & 0x800== 0) && (!read.getNotPrimaryAlignmentFlag());
  }
 
  private SAMRecord combineChimericReads(SAMRecord read1, SAMRecord read2) {
    SAMRecord left = null;
    SAMRecord right = null;
   
    if (read1.getAlignmentStart() < read2.getAlignmentStart()) {
      left = read1;
      right = read2;
    } else {
      left = read2;
      right = read1;
    }
   
    Cigar leftCigar = left.getCigar();
    Cigar rightCigar = right.getCigar();
   
    SAMRecord topHit = getTopHit(read1, read2);
   
    if ((rightCigar.getCigarElement(0).getOperator() == CigarOperator.S) &&
      (leftCigar.getCigarElement(left.getCigarLength()-1).getOperator() == CigarOperator.S) &&
      (topHit != null)) {
     
      List<CigarElement> leftElements = new ArrayList<CigarElement>();
      List<CigarElement> rightElements = new ArrayList<CigarElement>();
     
      // Drop trailing S on left side
      for (int i=0; i<left.getCigar().numCigarElements()-1; i++) {
        leftElements.add(leftCigar.getCigarElement(i));
      }

      // Drop leading S on right side
      for (int i=1; i<rightCigar.numCigarElements(); i++) {
        rightElements.add(rightCigar.getCigarElement(i));
      }
     
      // Encountered in dream test data at: chr20  22137016.  Not clear how to interpret.
      if (rightElements.get(0).getOperator() == CigarOperator.INSERTION) {
        return null;
      }
     
      // If total element length is longer than the read, then trim first element
      // on the left side of the indel (this is likely a deletion??)
//      if (totalLength > read1.getReadLength()) {
//        // Trim from the right side of the rightmos block of the left elements
//        int trimLength = totalLength - read1.getReadLength();
//        trimmedElemLength = trimRightmostElement(rightElements, trimLength);
//      }
//     
//      if (trimmedElemLength < 0) {
//        // We don't currently handle the case where we need to trim more
//        // than the length of the element
//        return null;
//      }
     
      List<ReadBlock> leftBlocks = ReadBlock.getReadBlocks(left);
      ReadBlock lastLeftBlock = leftBlocks.get(leftBlocks.size()-2);
     
      List<ReadBlock> rightBlocks = ReadBlock.getReadBlocks(right);
      ReadBlock firstRightBlock = rightBlocks.get(1);
     
      // Confirm no shared bases in read
//      int leftStop = lastLeftBlock.getReadStart() + lastLeftBlock.getLength() - 1;
//      int rightStart = firstRightBlock.getReadStart();
     
      int leftStop = lastLeftBlock.getReferenceStop();
      int rightStart = firstRightBlock.getReferenceStart();
     
      int leftReadStop = lastLeftBlock.getReadStart() + lastLeftBlock.getLength() - 1;
      int rightReadStart = firstRightBlock.getReadStart();

     
      int trimLength = 0;
      if ((leftStop >= rightStart) || (leftReadStop >= rightReadStart)) {
        trimLength = Math.max(leftStop - rightStart + 1, leftReadStop - rightReadStart + 1);
        int trimmedElemLength = trimRightmostElement(leftElements, trimLength);
        if (trimmedElemLength < 1) {
          // This isn't really an indel.  Just alternate mappings.
//          System.out.println("Element trimmed too far. read1: [" + read1.getSAMString() +
//              "] read2: [" + read2.getSAMString() + "]");
          return null;
        }
      }
     
      // Build indel
      int leftAlignmentStop = lastLeftBlock.getReferenceStop();
      leftAlignmentStop -= trimLength;
      int rightAlignmentStart = firstRightBlock.getReferenceStart();
     
      leftReadStop -= trimLength;
     
      int alignmentGap = rightAlignmentStart - leftAlignmentStop - 1;
      int readGap = rightReadStart - leftReadStop - 1;
     
      CigarElement indelElement;
      if ((alignmentGap > 0) && (readGap == 0)) {
        // Deletion
        indelElement = new CigarElement(alignmentGap, CigarOperator.DELETION);
      } else {
        int totalLength = this.getTotalLength(leftElements, rightElements);
//        int insertLen = read1.getReadLength() - totalLength;
        int insertLen = read1.getReadLength() - totalLength - alignmentGap;
        int insertLen2 = readGap - alignmentGap;
       
        if (insertLen != insertLen2) {
          // Not really an insert
          return null;
        }
       
        //TODO: Take a closer look at this
        if (insertLen < 1) {
          return null;
        }
       
        // Right side may have skipped SNPs, so pad it.
        if (alignmentGap > 0) {
          this.padLeftmostElement(rightElements, alignmentGap);
        }
       
        //TODO: Necessary for mismatches?
        /*
        if ((readGap > 0) && ((totalLength + insertLen) < read1.getReadLength())) {
          if (readGap != read1.getReadLength() - totalLength - insertLen) {
            System.out.println("WARNING: Invalid read gap padding: " + read1.getSAMString());
          }
          this.padLeftmostElement(rightElements, readGap);
        }
        */
       
        indelElement = new CigarElement(insertLen, CigarOperator.INSERTION);
      }
     
      // Check to see if the indel is surrounded by non-clipped bases
      if (minIndelBuffer > 0) {
        if ((getNonSoftClippedLength(leftElements) < minIndelBuffer) ||
          (getNonSoftClippedLength(rightElements) < minIndelBuffer)) {
          return null;
        }
      }
     
      List<CigarElement> elements = new ArrayList<CigarElement>();
      elements.addAll(leftElements);
      elements.add(indelElement);
      elements.addAll(rightElements);
     
      // Create combined read
      SAMRecord combinedRead = cloneRead(topHit);
      combinedRead.setAlignmentStart(leftBlocks.get(0).getReferenceStart());
      combinedRead.setCigar(new Cigar(elements));
      combinedRead.setMappingQuality((read1.getMappingQuality() + read2.getMappingQuality()) / 2);
//      combinedRead.setMappingQuality(Math.min(read1.getMappingQuality(), read2.getMappingQuality()));

      //TODO: Any other ancilliary info to set?
     
      return combinedRead;
    }
   
    return null;
  }
 
  private SAMRecord cloneRead(SAMRecord read) {
    try {
      return (SAMRecord) read.clone();
    } catch (CloneNotSupportedException e) {
      // Infamous "this should never happen" comment here.
      e.printStackTrace();
      throw new RuntimeException(e);
    }
  }
 
  private int trimRightmostElement(List<CigarElement> elements, int trimLength) {
    CigarElement toTrim = elements.get(elements.size()-1);
    int newLength = toTrim.getLength() - trimLength;
    CigarElement replacement = new CigarElement(newLength, toTrim.getOperator());
    elements.set(elements.size()-1, replacement);
   
    return newLength;
  }
 
  private int padLeftmostElement(List<CigarElement> elements, int padLength) {
    CigarElement toPad = elements.get(0);
    int newLength = toPad.getLength() + padLength;
    CigarElement replacement = new CigarElement(newLength, toPad.getOperator());
    elements.set(0, replacement);
   
    return newLength;
  }
 
  private int getTotalLength(List<CigarElement> elements) {
    int total = 0;
   
    for (CigarElement element : elements) {
      if (element.getOperator() != CigarOperator.D) {
        total += element.getLength();
      }
    }
   
    return total;
  }
 
  private int getNonSoftClippedLength(List<CigarElement> elements) {
    int total = 0;
   
    for (CigarElement element : elements) {
      if ((element.getOperator() != CigarOperator.D) && (element.getOperator() != CigarOperator.S)) {
        total += element.getLength();
      }
    }
   
    return total;
  }
 
  private int getTotalLength(List<CigarElement> left, List<CigarElement> right) {
    return getTotalLength(left) + getTotalLength(right);
  }
 
  static class ReadComparator implements Comparator<SAMRecord> {

    @Override
    public int compare(SAMRecord read1, SAMRecord read2) {
      int cmp = getMappedReadStart(read1) - getMappedReadStart(read2);
     
      if (cmp == 0) {
        cmp = getMappedReadEnd(read1) - getMappedReadEnd(read2);
      }
           
      return cmp;
    }
  }
 
  private void sortReadsByPosition(List<SAMRecord> reads) {
    Collections.sort(reads, new ReadComparator());
  }

  private void outputRead(SAMRecord read, boolean isCombined, SAMFileWriter out) {
    try {
      System.out.println(read.getSAMString());
      out.addAlignment(read);
    } catch (NullPointerException e) {
      System.out.println("isCombined: " + isCombined);
      System.out.println(read);
      System.out.println(read.getReadName());
      System.out.println(read.getSAMString());
      throw e;
    }
  }
 
  private void pruneLikelyInserts(List<SAMRecord> sortedReads) {
    if (sortedReads.size() == 3) {
      SAMRecord first  = sortedReads.get(0);
      SAMRecord middle = sortedReads.get(1);
      SAMRecord last   = sortedReads.get(2);
     
//      if ((middle.getAlignmentStart() <= first.getAlignmentEnd()) && (middle.getAlignmentEnd() >= last.getAlignmentStart())) {
//        sortedReads.remove(middle);
//      }
     
      int FUDGE_FACTOR = 5;
     
      if ((getMappedReadStart(middle) <= getMappedReadEnd(first) + FUDGE_FACTOR) && (getMappedReadEnd(middle) >= getMappedReadStart(last) - FUDGE_FACTOR)) {
        sortedReads.remove(middle);
      }
    }
  }
 
  // 1 based read start
  private static int getMappedReadStart(SAMRecord read) {
   
    List<ReadBlock> blocks = ReadBlock.getReadBlocks(read);
   
    for (ReadBlock block : blocks) {
      if (block.getType() != CigarOperator.S) {
        return block.getReadStart();
      }
    }

    return -1;
  }
 
  // 1 based read start (inclusive)
  private static int getMappedReadEnd(SAMRecord read) {
    List<ReadBlock> blocks = ReadBlock.getReadBlocks(read);
   
    for (int i=blocks.size()-1; i>=0; i--) {
      ReadBlock block = blocks.get(i);
      if (block.getType() != CigarOperator.S) {
        return block.getReadStart() + block.getLength() - 1;
      }
    }
   
    return -1;
  }
 
  /*
  public static void main(String[] args) {
   
    CombineChimera3 cc3 = new CombineChimera3();
   
    String in;
    String out;
   

    in = "/home/lmose/dev/abra/chim_insert_bug/t2.bam";
    out = "/home/lmose/dev/abra/chim_insert_bug/t2_out.bam";
    cc3.combine(in, out, 33);

  }
  */
TOP

Related Classes of abra.CombineChimera3$ReadComparator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.