Package net.sf.cram

Source Code of net.sf.cram.Cram2BamRecordFactory

package net.sf.cram;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;

import net.sf.cram.encoding.read_features.Deletion;
import net.sf.cram.encoding.read_features.HardClip;
import net.sf.cram.encoding.read_features.InsertBase;
import net.sf.cram.encoding.read_features.Insertion;
import net.sf.cram.encoding.read_features.Padding;
import net.sf.cram.encoding.read_features.ReadBase;
import net.sf.cram.encoding.read_features.ReadFeature;
import net.sf.cram.encoding.read_features.RefSkip;
import net.sf.cram.encoding.read_features.SoftClip;
import net.sf.cram.encoding.read_features.Substitution;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMReadGroupRecord;
import net.sf.samtools.SAMRecord;

public class Cram2BamRecordFactory {

  private SAMFileHeader header;

  public Cram2BamRecordFactory(SAMFileHeader header) {
    this.header = header;
  }

  public SAMRecord create(CramRecord cramRecord) {
    SAMRecord samRecord = new SAMRecord(header);

    samRecord.setReadName(cramRecord.getReadName());
    copyFlags(cramRecord, samRecord);

    if (cramRecord.sequenceId == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
      samRecord.setAlignmentStart(SAMRecord.NO_ALIGNMENT_START);
      samRecord.setMappingQuality(SAMRecord.NO_MAPPING_QUALITY);
      samRecord.setReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
    } else {
      samRecord.setReferenceIndex(cramRecord.sequenceId);
      samRecord.setAlignmentStart(cramRecord.getAlignmentStart());
      samRecord.setMappingQuality(cramRecord.getMappingQuality());
    }

    if (cramRecord.segmentUnmapped)
      samRecord.setCigarString(SAMRecord.NO_ALIGNMENT_CIGAR);
    else
      samRecord.setCigar(getCigar2(cramRecord.getReadFeatures(),
          cramRecord.getReadLength()));

    if (samRecord.getReadPairedFlag()) {
      samRecord.setMateReferenceIndex(cramRecord.mateSequnceID);
      samRecord
          .setMateAlignmentStart(cramRecord.mateSequnceID == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX ? SAMRecord.NO_ALIGNMENT_START
              : cramRecord.mateAlignmentStart);
      samRecord.setMateNegativeStrandFlag(cramRecord.mateNegativeStrand);
      samRecord.setMateUnmappedFlag(cramRecord.mateUmapped);
    } else {
      samRecord
          .setMateReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
      samRecord.setMateAlignmentStart(SAMRecord.NO_ALIGNMENT_START);
    }

    samRecord.setInferredInsertSize(cramRecord.templateSize);
    samRecord.setReadBases(cramRecord.getReadBases());
    samRecord.setBaseQualities(cramRecord.getQualityScores());

    if (cramRecord.tags != null)
      for (ReadTag tag : cramRecord.tags)
        samRecord.setAttribute(tag.getKey(), tag.getValue());

    if (cramRecord.getReadGroupID() > -1) {
//    if (cramRecord.getReadGroupID() < header.getReadGroups().size()) {
      SAMReadGroupRecord readGroupRecord = header.getReadGroups().get(
          cramRecord.getReadGroupID());
      samRecord.setAttribute("RG", readGroupRecord.getId());
    }

    return samRecord;
  }

  private static final void copyFlags(CramRecord cr, SAMRecord sr) {
    sr.setReadPairedFlag(cr.multiFragment);
    sr.setProperPairFlag(cr.properPair);
    sr.setReadUnmappedFlag(cr.segmentUnmapped);
    sr.setReadNegativeStrandFlag(cr.negativeStrand);
    sr.setFirstOfPairFlag(cr.firstSegment);
    sr.setSecondOfPairFlag(cr.lastSegment);
    sr.setNotPrimaryAlignmentFlag(cr.secondaryALignment);
    sr.setReadFailsVendorQualityCheckFlag(cr.vendorFiltered);
    sr.setDuplicateReadFlag(cr.duplicate);
  }

  private static final Cigar getCigar2(Collection<ReadFeature> features,
      int readLength) {
    if (features == null || features.isEmpty()) {
      CigarElement ce = new CigarElement(readLength, CigarOperator.M);
      return new Cigar(Arrays.asList(ce));
    }

    List<CigarElement> list = new ArrayList<CigarElement>();
    int totalOpLen = 1;
    CigarElement ce;
    CigarOperator lastOperator = CigarOperator.MATCH_OR_MISMATCH;
    int lastOpLen = 0;
    int lastOpPos = 1;
    CigarOperator co = null;
    int rfLen = 0;
    for (ReadFeature f : features) {

      int gap = f.getPosition() - (lastOpPos + lastOpLen);
      if (gap > 0) {
        if (lastOperator != CigarOperator.MATCH_OR_MISMATCH) {
          list.add(new CigarElement(lastOpLen, lastOperator));
          lastOpPos += lastOpLen;
          totalOpLen += lastOpLen;
          lastOpLen = gap;
        } else {
          lastOpLen += gap;
        }

        lastOperator = CigarOperator.MATCH_OR_MISMATCH;
      }

      switch (f.getOperator()) {
      case Insertion.operator:
        co = CigarOperator.INSERTION;
        rfLen = ((Insertion) f).getSequence().length;
        break;
      case SoftClip.operator:
        co = CigarOperator.SOFT_CLIP;
        rfLen = ((SoftClip) f).getSequence().length;
        break;
      case HardClip.operator:
        co = CigarOperator.HARD_CLIP ;
        rfLen = ((HardClip) f).getSequence().length;
        break;
      case InsertBase.operator:
        co = CigarOperator.INSERTION;
        rfLen = 1;
        break;
      case Deletion.operator:
        co = CigarOperator.DELETION;
        rfLen = ((Deletion) f).getLength();
        break;
      case RefSkip.operator:
        co = CigarOperator.SKIPPED_REGION;
        rfLen = ((RefSkip) f).getLength();
        break;
      case Padding.operator:
        co = CigarOperator.PADDING ;
        rfLen = ((Padding) f).getLength();
        break;
      case Substitution.operator:
      case ReadBase.operator:
        co = CigarOperator.MATCH_OR_MISMATCH;
        rfLen = 1;
        break;
      default:
        continue;
      }

      if (lastOperator != co) {
        // add last feature
        if (lastOpLen > 0) {
          list.add(new CigarElement(lastOpLen, lastOperator));
          totalOpLen += lastOpLen;
        }
        lastOperator = co;
        lastOpLen = rfLen;
        lastOpPos = f.getPosition();
      } else
        lastOpLen += rfLen;

      if (!co.consumesReadBases())
        lastOpPos -= rfLen;
    }

    if (lastOperator != null) {
      if (lastOperator != CigarOperator.M) {
        list.add(new CigarElement(lastOpLen, lastOperator));
        if (readLength >= lastOpPos + lastOpLen) {
          ce = new CigarElement(readLength - (lastOpLen + lastOpPos)
              + 1, CigarOperator.M);
          list.add(ce);
        }
      } else if (readLength > lastOpPos - 1) {
        ce = new CigarElement(readLength - lastOpPos + 1,
            CigarOperator.M);
        list.add(ce);
      }
    }

    if (list.isEmpty()) {
      ce = new CigarElement(readLength, CigarOperator.M);
      return new Cigar(Arrays.asList(ce));
    }

    return new Cigar(list);
  }
}
TOP

Related Classes of net.sf.cram.Cram2BamRecordFactory

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.