Package abra

Source Code of abra.SAMRecordUtils

/* Copyright 2013 University of North Carolina at Chapel Hill.  All rights reserved. */
package abra;

import java.util.ArrayList;
import java.util.List;

import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord;

/**
* Utility methods for dealing with SAMRecord
*
* @author Lisle E. Mose (lmose at unc dot edu)
*/
public class SAMRecordUtils {

  /**
   * Replace hard clips with soft clips.
   */
  public static void replaceHardClips(SAMRecord read) {
    Cigar cigar = read.getCigar();
   
    if (cigar.getCigarElements().size() > 0) {
      CigarElement firstElement = cigar.getCigarElement(0);
      CigarElement lastElement  = cigar.getCigarElement(cigar.numCigarElements()-1);
     
      if ((firstElement.getOperator() == CigarOperator.H) ||
        (lastElement.getOperator() == CigarOperator.H)) {
       
        Cigar newCigar = new Cigar();
       
        boolean isFirst = true;
       
        for (CigarElement element : cigar.getCigarElements()) {
          if (element.getOperator() != CigarOperator.H) {
            newCigar.add(element);
          } else {
            CigarElement softClip = new CigarElement(element.getLength(), CigarOperator.S);
            newCigar.add(softClip);
           
            if (isFirst) {
              read.setReadString(padBases(element.getLength()) + read.getReadString());
            } else {
              read.setReadString(read.getReadString() + padBases(element.getLength()));             
            }
          }
         
          isFirst = false;
        }
       
        read.setCigar(newCigar);
      }
    }
  }
 
  private static String padBases(int length) {
    StringBuffer buf = new StringBuffer(length);
    for (int i=0; i<length; i++) {
      buf.append('N');
    }
    return buf.toString();
  }
 
  /**
   * Remove leading or trailing soft clips from the input read.
   * Does not modify a read entirely comprised of soft clips.
   */
  public static void removeSoftClips(SAMRecord read) {
   
    Cigar cigar = read.getCigar();
   
    CigarElement firstElement = cigar.getCigarElement(0);
    CigarElement lastElement  = cigar.getCigarElement(cigar.numCigarElements()-1);
   
    if ((firstElement.getOperator() == CigarOperator.S) ||
      (lastElement.getOperator() == CigarOperator.S)) {
   
      Cigar newCigar = new Cigar();
     
      String bases = read.getReadString();
      //String qualities = read.getBaseQualityString();
         
      if (firstElement.getOperator() == CigarOperator.S) {
        bases = bases.substring(firstElement.getLength(), bases.length());
        //qualities = qualities.substring(firstElement.getLength(), qualities.length()-1);
      } else {
        newCigar.add(firstElement);
      }
     
      for (int i=1; i<cigar.numCigarElements()-1; i++) {
        newCigar.add(cigar.getCigarElement(i));
      }
     
      if (lastElement.getOperator() == CigarOperator.S) {
        bases = bases.substring(0, bases.length() - lastElement.getLength());
        //qualities = qualities.substring(0, qualities.length() - lastElement.getLength() - 1);
      } else {
        newCigar.add(lastElement);
      }
     
      read.setCigar(newCigar);
      read.setReadString(bases);
      //read.setBaseQualityString(qualities);
    }
  }

  public static Cigar subset(Cigar cigar, int startIdx, int endIdx) {
   
    List<CigarElement> subElements = new ArrayList<CigarElement>();
   
    // Find first element and index into first element
    int len = 0;
    int elemIdx = -1;
    for (CigarElement elem : cigar.getCigarElements()) {

      // Treat deletions as zero length.
      int elemLength = elem.getOperator() == CigarOperator.D ? 0 : elem.getLength();
     
      if (elemIdx < 0) {
       
        // Find first element (Should never be a deletion)
        int elemStart = len;
        int elemStop  = len + elemLength;
       
        if ((startIdx >= elemStart) && (startIdx < elemStop)) {
          elemIdx = startIdx - elemStart;
          int elemLen = Math.min(elem.getLength()-elemIdx, endIdx-startIdx+1);
          CigarElement newElem = new CigarElement(elemLen, elem.getOperator());
          subElements.add(newElem);
        }
       
        len += elemLength;
       
      } else if ((len + elemLength) <= endIdx) {
        // Add this entire element
        subElements.add(elem);
        len += elemLength;
      } else if (len <= endIdx) {
        // Add part of last sub element (should never be a deletion)
        CigarElement newElem = new CigarElement(endIdx-len+1, elem.getOperator());
        subElements.add(newElem);
        break;
      } else {
        break;
      }
    }
   
    return new Cigar(subElements);
  }
 
  /**
   * Calculates edit distance for the input read.
   * If the input c2r is not null, compare to the actual reference.
   * If c2r is null, check the input NM tag.
   */
  public static int getEditDistance(SAMRecord read, CompareToReference2 c2r) {
   
    Integer distance = null;
   
    if (read.getReadUnmappedFlag()) {
      distance = read.getReadLength();
    } else if (c2r != null) {
      //distance = c2r.numMismatches(read) + getNumIndelBases(read);
      distance = c2r.numMismatches(read) + getNumIndelBases(read);
    } else {
      distance = read.getIntegerAttribute("NM");
     
      if (distance == null) {
        distance = read.getReadLength();
      }
    }
       
    return distance;
  }
   
  /**
   *  Returns total length of deletions and insertions for the input read.
   */
  public static int getNumIndelBases(SAMRecord read) {
    int numIndelBases = 0;
   
    for (CigarElement element : read.getCigar().getCigarElements()) {
      if ((element.getOperator() == CigarOperator.D) || (element.getOperator() == CigarOperator.I)) {
        numIndelBases += element.getLength();
      }
    }
   
    return numIndelBases;
  }
 
  /**
   *  Returns original edit distance as set in YX tag.
   */
  public static int getOrigEditDistance(SAMRecord read) {
   
    Integer distance = null;
   
    if (read.getReadUnmappedFlag()) {
      distance = read.getReadLength();
    } else {
      distance = read.getIntegerAttribute("YX");
     
      if (distance == null) {
        distance = read.getReadLength();
      }
    }
       
    return distance;
  }
 
  /**
   * Convenience method for retrieving int attribute
   */
  public static int getIntAttribute(SAMRecord read, String attribute) {
    Integer num = read.getIntegerAttribute(attribute);
   
    if (num == null) {
      return 0;
    } else {
      return num;
    }
  }
 
  /**
   * Return the number of insertions and deletions in a SAMRecord
   */
  public static int getNumIndels(SAMRecord read) {
    int numIndels = 0;
   
    for (CigarElement element : read.getCigar().getCigarElements()) {
      if ((element.getOperator() == CigarOperator.D) || (element.getOperator() == CigarOperator.I)) {
        numIndels += 1;
      }
    }
   
    return numIndels;
  }
 
  /**
   *  Returns true if the updatedRead is essentially the same as the origRead
   *  minus soft clipping.
   */
  public static boolean isSoftClipEquivalent(SAMRecord origRead, SAMRecord updatedRead) {
   
    boolean isEquivalent = false;
   
    if ((origRead.getCigarString().contains("S")) &&
      (origRead.getReferenceName().equals(updatedRead.getReferenceName())) &&
      (origRead.getReadNegativeStrandFlag() == updatedRead.getReadNegativeStrandFlag()) &&
      (origRead.getCigarLength() > 1)) {
     
      // Compare start positions
      int nonClippedOrigStart = origRead.getAlignmentStart();
      CigarElement firstElem = origRead.getCigar().getCigarElement(0);
      if (firstElem.getOperator() == CigarOperator.S) {
        nonClippedOrigStart -= firstElem.getLength();
      }
     
      if (nonClippedOrigStart == updatedRead.getAlignmentStart()) {
        // Compare cigars
        List<CigarElement> elems = new ArrayList<CigarElement>(origRead.getCigar().getCigarElements());
       
        CigarElement first = elems.get(0);
       
        // If first element is soft clipped, lengthen the second element
        if (first.getOperator() == CigarOperator.S) {
          CigarElement second = elems.get(1);
          CigarElement newElem = new CigarElement(first.getLength() + second.getLength(), second.getOperator());
          elems.set(1,  newElem);
        }
       
        CigarElement last = elems.get(elems.size()-1);
        if (last.getOperator() == CigarOperator.S) {
          CigarElement nextToLast = elems.get(elems.size()-2);
          CigarElement newElem = new CigarElement(last.getLength() + nextToLast.getLength(), nextToLast.getOperator());
          elems.set(elems.size()-2, newElem);
        }
       
        List<CigarElement> newElems = new ArrayList<CigarElement>();

        for (CigarElement elem : elems) {
          if (elem.getOperator() != CigarOperator.S) {
            newElems.add(elem);
          }
        }
       
        Cigar convertedCigar = new Cigar(newElems);
       
        if (convertedCigar.equals(updatedRead.getCigar())) {
          isEquivalent = true;
        }
      }
    }
   
    return isEquivalent;
  }
 
  /**
   * Returns a clone of the input read
   */
  public static SAMRecord cloneRead(SAMRecord read) {
    try {
      return (SAMRecord) read.clone();
    } catch (CloneNotSupportedException e) {
      // Infamous "this should never happen" comment here.
      e.printStackTrace();
      throw new RuntimeException(e);
    }
  }

  /**
   * Returns true if the input read should be filtered
   */
  public static boolean isFiltered(boolean isPairedEnd, SAMRecord read) {
    // Filter out single end reads when in paired end mode.
    return ((isPairedEnd) && (!read.getReadPairedFlag()));
  }
 
  /**
   * Returns true if the input read is primary.
   * i.e. Bit flag not secondary 0x200 or supplemental 0x800
   */
  public static boolean isPrimary(SAMRecord read) {
    return ((read.getFlags() & 0x800== 0) && (!read.getNotPrimaryAlignmentFlag());
  }

}
TOP

Related Classes of abra.SAMRecordUtils

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.