Package net.sf.cram

Source Code of net.sf.cram.CramNormalizer

package net.sf.cram;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import net.sf.cram.encoding.read_features.BaseQualityScore;
import net.sf.cram.encoding.read_features.Deletion;
import net.sf.cram.encoding.read_features.InsertBase;
import net.sf.cram.encoding.read_features.Insertion;
import net.sf.cram.encoding.read_features.ReadBase;
import net.sf.cram.encoding.read_features.ReadFeature;
import net.sf.cram.encoding.read_features.SoftClip;
import net.sf.cram.encoding.read_features.Substitution;
import net.sf.cram.structure.SubstitutionMatrix;
import net.sf.picard.util.Log;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;

public class CramNormalizer {
  private SAMFileHeader header;
  private int readCounter = 0;
  private String readNamePrefix = "";
  private byte defaultQualityScore = '?' - '!';

  private static Log log = Log.getInstance(CramNormalizer.class);

  public CramNormalizer(SAMFileHeader header) {
    this.header = header;
  }

  public void normalize(ArrayList<CramRecord> records, boolean resetPairing,
      byte[] ref, int alignmentStart,
      SubstitutionMatrix substitutionMatrix, boolean AP_delta) {

    int startCounter = readCounter;

    for (CramRecord r : records) {
      r.index = ++readCounter;

      if (r.sequenceId == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
        r.setSequenceName(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME);
        r.setAlignmentStart(SAMRecord.NO_ALIGNMENT_START);
      } else {
        r.setSequenceName(header.getSequence(r.sequenceId)
            .getSequenceName());
      }
    }

    {// restore pairing first:
      for (CramRecord r : records) {
        if (!r.multiFragment || r.detached) {
          r.recordsToNextFragment = -1;

          r.next = null;
          r.previous = null;
          continue;
        }
        if (r.hasMateDownStream) {
          CramRecord downMate = records.get(r.index
              + r.recordsToNextFragment - startCounter);
          r.next = downMate;
          downMate.previous = r;

          r.mateAlignmentStart = downMate.getAlignmentStart();
          r.mateUmapped = downMate.segmentUnmapped;
          r.mateNegativeStrand = downMate.negativeStrand;
          r.mateSequnceID = downMate.sequenceId;
          if (r.mateSequnceID == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX)
            r.mateAlignmentStart = SAMRecord.NO_ALIGNMENT_START;

          downMate.mateAlignmentStart = r.getAlignmentStart();
          downMate.mateUmapped = r.segmentUnmapped;
          downMate.mateNegativeStrand = r.negativeStrand;
          downMate.mateSequnceID = r.sequenceId;
          if (downMate.mateSequnceID == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX)
            downMate.mateAlignmentStart = SAMRecord.NO_ALIGNMENT_START;

          if (r.firstSegment && downMate.lastSegment) {
            r.templateSize = Utils.computeInsertSize(r, downMate);
            downMate.templateSize = -r.templateSize;
          } else if (r.lastSegment && downMate.firstSegment) {
            downMate.templateSize = Utils.computeInsertSize(
                downMate, r);
            r.templateSize = -downMate.templateSize;
          }
        }
      }
    }

    // assign some read names if needed:
    for (CramRecord r : records) {
      if (r.getReadName() == null) {
        String name = readNamePrefix + r.index;
        r.setReadName(name);
        if (r.next != null)
          r.next.setReadName(name);
        if (r.previous != null)
          r.previous.setReadName(name);
      }
    }

    // resolve bases:
    for (CramRecord r : records) {
      if (r.segmentUnmapped)
        continue;

      byte[] bases = restoreReadBases(r, ref, substitutionMatrix);
      r.setReadBases(bases);
    }

    // restore quality scores:
    for (CramRecord r : records) {
      if (!r.forcePreserveQualityScores) {
        byte[] scores = new byte[r.getReadLength()];
        Arrays.fill(scores, defaultQualityScore);
        if (r.getReadFeatures() != null)
          for (ReadFeature f : r.getReadFeatures()) {
            if (f.getOperator() == BaseQualityScore.operator) {
              int pos = f.getPosition();
              byte q = ((BaseQualityScore) f).getQualityScore();

              try {
                scores[pos - 1] = q;
              } catch (ArrayIndexOutOfBoundsException e) {
                System.err.println("PROBLEM CAUSED BY:");
                System.err.println(r.toString());
                throw e;
              }
            }

          }

        r.setQualityScores(scores);
      } else {
        byte[] scores = r.getQualityScores();
        for (int i = 0; i < scores.length; i++)
          if (scores[i] == -1)
            scores[i] = defaultQualityScore;
      }
    }
  }

  public void restoreQualityScores(byte defaultQualityScore,
      List<CramRecord> records) {

  }

  private static final long calcRefLength(CramRecord record) {
    if (record.getReadFeatures() == null
        || record.getReadFeatures().isEmpty())
      return record.getReadLength();
    long len = record.getReadLength();
    for (ReadFeature rf : record.getReadFeatures()) {
      switch (rf.getOperator()) {
      case Deletion.operator:
        len += ((Deletion) rf).getLength();
        break;
      case Insertion.operator:
        len -= ((Insertion) rf).getSequence().length;
        break;
      default:
        break;
      }
    }

    return len;
  }

  private static final byte[] restoreReadBases(CramRecord record, byte[] ref,
      SubstitutionMatrix substitutionMatrix) {
    int readLength = (int) record.getReadLength();
    byte[] bases = new byte[readLength];

    int posInRead = 1;
    int alignmentStart = record.getAlignmentStart() - 1;

    int posInSeq = 0;
    if (record.getReadFeatures() == null
        || record.getReadFeatures().isEmpty()) {
      if (ref.length < alignmentStart + bases.length) {
        Arrays.fill(bases, (byte) 'N');
        System.arraycopy(ref, alignmentStart, bases, 0,
            Math.min(bases.length, ref.length - alignmentStart));
      } else
        System.arraycopy(ref, alignmentStart, bases, 0, bases.length);
      return bases;
    }
    List<ReadFeature> variations = record.getReadFeatures();
    for (ReadFeature v : variations) {
      for (; posInRead < v.getPosition(); posInRead++)
        bases[posInRead - 1] = ref[alignmentStart + posInSeq++];

      switch (v.getOperator()) {
      case Substitution.operator:
        Substitution sv = (Substitution) v;
        byte refBase = ref[alignmentStart + posInSeq];
        byte base = substitutionMatrix.base(refBase, sv.getCode());
        // switch (base) {
        // case 'A':
        // case 'C':
        // case 'G':
        // case 'T':
        // case 'N':
        // break;
        //
        // default:
        // throw new RuntimeException("Invalid base: " + base) ;
        // }
        sv.setBase(base);
        sv.setRefernceBase(refBase);
        bases[posInRead++ - 1] = base;
        posInSeq++;
        break;
      case Insertion.operator:
        Insertion iv = (Insertion) v;
        for (int i = 0; i < iv.getSequence().length; i++)
          bases[posInRead++ - 1] = iv.getSequence()[i];
        break;
      case SoftClip.operator:
        SoftClip sc = (SoftClip) v;
        for (int i = 0; i < sc.getSequence().length; i++)
          bases[posInRead++ - 1] = sc.getSequence()[i];
        break;
      case Deletion.operator:
        Deletion dv = (Deletion) v;
        posInSeq += dv.getLength();
        break;
      case InsertBase.operator:
        InsertBase ib = (InsertBase) v;
        bases[posInRead++ - 1] = ib.getBase();
        break;
      }
    }
    for (; posInRead <= readLength; posInRead++)
      bases[posInRead - 1] = ref[alignmentStart + posInSeq++];

    // ReadBase overwrites bases:
    for (ReadFeature v : variations) {
      switch (v.getOperator()) {
      case ReadBase.operator:
        ReadBase rb = (ReadBase) v;
        bases[v.getPosition() - 1] = rb.getBase();
        break;
      default:
        break;
      }
    }

    for (int i = 0; i < bases.length; i++) {
      switch (bases[i]) {
      case 'a':
        bases[i] = 'A';
        break;
      case 'c':
        bases[i] = 'C';
        break;
      case 'g':
        bases[i] = 'G';
        break;
      case 't':
        bases[i] = 'T';
        break;
      case 'n':
        bases[i] = 'N';
        break;

      default:
        break;
      }
    }

    return bases;
  }
}
TOP

Related Classes of net.sf.cram.CramNormalizer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.