Package htsjdk.samtools.util

Examples of htsjdk.samtools.util.ProgressLogger


        }

        createSamFileWriter(header);

        log.info("Traversing query name sorted records and fixing up mate pair information.");
        final ProgressLogger progress = new ProgressLogger(log);
        while (iterator.hasNext()) {
            final SAMRecord record = iterator.next();
            out.addAlignment(record);
            progress.record(record);
        }
        iterator.close();

        if (header.getSortOrder() == SortOrder.queryname) {
            log.info("Closing output file.");
View Full Code Here


        if  (CREATE_INDEX && writer.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate){
            throw new PicardException("Can't CREATE_INDEX unless sort order is coordinate");
        }

        final ProgressLogger progress = new ProgressLogger(Log.getInstance(SamFormatConverter.class));
        for (final SAMRecord rec : reader) {
            writer.addAlignment(rec);
            progress.record(rec);
        }
        reader.close();
        writer.close();
        return 0;
    }
View Full Code Here

        final Map<String, SAMRecord> firstSeenMates = new HashMap<String, SAMRecord>();
        final FastqWriterFactory factory = new FastqWriterFactory();
        factory.setCreateMd5(CREATE_MD5_FILE);
        final Map<SAMReadGroupRecord, FastqWriters> writers = generateWriters(reader.getFileHeader().getReadGroups(), factory);

        final ProgressLogger progress = new ProgressLogger(log);
        for (final SAMRecord currentRecord : reader) {
            if (currentRecord.isSecondaryOrSupplementary() && !INCLUDE_NON_PRIMARY_ALIGNMENTS)
                continue;

            // Skip non-PF reads as necessary
            if (currentRecord.getReadFailsVendorQualityCheckFlag() && !INCLUDE_NON_PF_READS)
                continue;

            final FastqWriters fq = writers.get(currentRecord.getReadGroup());
            if (currentRecord.getReadPairedFlag()) {
                final String currentReadName = currentRecord.getReadName();
                final SAMRecord firstRecord = firstSeenMates.remove(currentReadName);
                if (firstRecord == null) {
                    firstSeenMates.put(currentReadName, currentRecord);
                } else {
                    assertPairedMates(firstRecord, currentRecord);

                    final SAMRecord read1 =
                            currentRecord.getFirstOfPairFlag() ? currentRecord : firstRecord;
                    final SAMRecord read2 =
                            currentRecord.getFirstOfPairFlag() ? firstRecord : currentRecord;
                    writeRecord(read1, 1, fq.getFirstOfPair(), READ1_TRIM, READ1_MAX_BASES_TO_WRITE);
                    final FastqWriter secondOfPairWriter = fq.getSecondOfPair();
                    if (secondOfPairWriter == null) {
                        throw new PicardException("Input contains paired reads but no SECOND_END_FASTQ specified.");
                    }
                    writeRecord(read2, 2, secondOfPairWriter, READ2_TRIM, READ2_MAX_BASES_TO_WRITE);
                }
            } else {
                writeRecord(currentRecord, null, fq.getUnpaired(), READ1_TRIM, READ1_MAX_BASES_TO_WRITE);
            }

            progress.record(currentRecord);
        }

        reader.close();

        // Close all the fastq writers being careful to close each one only once!
View Full Code Here

            OUTPUT.getName() + " [sortorder=" + outputHeader.getSortOrder().name() + "]");

        // create OUTPUT file
        final SAMFileWriter outputWriter = new SAMFileWriterFactory().makeSAMOrBAMWriter(outputHeader, presorted, OUTPUT);

        final ProgressLogger progress = new ProgressLogger(log, (int) 1e6, "Written");
       
        while (filteringIterator.hasNext()) {
            final SAMRecord rec = filteringIterator.next();
            outputWriter.addAlignment(rec);
            progress.record(rec);
        }

        filteringIterator.close();
        outputWriter.close();
        inputReader.close();
        log.info(new DecimalFormat("#,###").format(progress.getCount()) + " SAMRecords written to " + OUTPUT.getName());
    }
View Full Code Here

                                      final File outputFile) {
        final EnumSet<Options> options = EnumSet.copyOf(VariantContextWriterFactory.DEFAULT_OPTIONS);
        if (createIndex) options.add(Options.INDEX_ON_THE_FLY); else options.remove(Options.INDEX_ON_THE_FLY);
        final VariantContextWriter out = VariantContextWriterFactory.create(outputFile, sequenceDictionary, options);

        final ProgressLogger progress = new ProgressLogger(log, 10000);
        VariantContext lastContext = null;
        File lastFile = null;
        VCFHeader firstHeader = null;
        VariantContextComparator comparator = null;

        for (final File f : inputFiles) {
            log.debug("Gathering from file: ", f.getAbsolutePath());
            final VCFFileReader variantReader = new VCFFileReader(f, false);
            final PeekableIterator<VariantContext> variantIterator = new PeekableIterator<VariantContext>(variantReader.iterator());
            final VCFHeader header = variantReader.getFileHeader();

            if (firstHeader == null) {
                firstHeader = header;
                out.writeHeader(firstHeader);
                comparator = new VariantContextComparator(firstHeader.getContigLines());
            }

            if (lastContext != null && variantIterator.hasNext()) {
                final VariantContext vc = variantIterator.peek();
                if (comparator.compare(vc, lastContext) <= 0) {
                    throw new IllegalStateException("First variant in file " + f.getAbsolutePath() + " is at " + vc.getSource() +
                            " but last variant in earlier file " + lastFile.getAbsolutePath() + " is at " + lastContext.getSource());
                }
            }

            while (variantIterator.hasNext()) {
                lastContext = variantIterator.next();
                out.add(lastContext);
                progress.record(lastContext.getChr(), lastContext.getStart());
            }

            lastFile = f;

            CloserUtil.close(variantIterator);
View Full Code Here

      if (CREATE_INDEX && sequenceDictionary == null) {
        throw new PicardException("A sequence dictionary must be available (either through the input file or by setting it explicitly) when creating indexed output.");
      }

        final ProgressLogger progress = new ProgressLogger(Log.getInstance(MakeSitesOnlyVcf.class), 10000);

        // Setup the site-only file writer
        final VariantContextWriterBuilder builder = new VariantContextWriterBuilder()
                .setOutputFile(OUTPUT)
                .setReferenceDictionary(sequenceDictionary);
        if (CREATE_INDEX)
            builder.setOption(Options.INDEX_ON_THE_FLY);
        else
            builder.unsetOption(Options.INDEX_ON_THE_FLY);
        final VariantContextWriter writer = builder.build();

        final VCFHeader header = new VCFHeader(inputVcfHeader.getMetaDataInInputOrder(), SAMPLE);
        writer.writeHeader(header);

        // Go through the input, strip the records and write them to the output
        final CloseableIterator<VariantContext> iterator = reader.iterator();
      while (iterator.hasNext()) {
        final VariantContext full = iterator.next();
            final VariantContext site = subsetToSamplesWithOriginalAnnotations(full, SAMPLE);
            writer.add(site);
            progress.record(site.getChr(), site.getStart());
        }

      CloserUtil.close(iterator);
      CloserUtil.close(reader);
      writer.close();
View Full Code Here

  }

  @Override
  protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    final ProgressLogger progress = new ProgressLogger(log, 10000);

    final VCFFileReader fileReader = new VCFFileReader(INPUT);
    final VCFHeader fileHeader = fileReader.getFileHeader();

    final SAMSequenceDictionary sequenceDictionary =
        SEQUENCE_DICTIONARY != null
            ? SAMFileReader.getSequenceDictionary(SEQUENCE_DICTIONARY)
            : fileHeader.getSequenceDictionary();
    if (CREATE_INDEX && sequenceDictionary == null) {
      throw new PicardException("A sequence dictionary must be available (either through the input file or by setting it explicitly) when creating indexed output.");
    }

        final VariantContextWriterBuilder builder = new VariantContextWriterBuilder()
                .setReferenceDictionary(sequenceDictionary)
                .clearOptions();
        if (CREATE_INDEX)
            builder.setOption(Options.INDEX_ON_THE_FLY);

    final VariantContextWriter snpWriter = builder.setOutputFile(SNP_OUTPUT).build();
    final VariantContextWriter indelWriter = builder.setOutputFile(INDEL_OUTPUT).build();
    snpWriter.writeHeader(fileHeader);
    indelWriter.writeHeader(fileHeader);

        int incorrectVariantCount = 0;

    final CloseableIterator<VariantContext> iterator = fileReader.iterator();
    while (iterator.hasNext()) {
      final VariantContext context = iterator.next();
      if (context.isIndel()) indelWriter.add(context);
      else if (context.isSNP()) snpWriter.add(context);
      else {
                if (STRICT) throw new IllegalStateException("Found a record with type " + context.getType().name());
                else incorrectVariantCount++;
            }

            progress.record(context.getChr(), context.getStart());
    }

        if (incorrectVariantCount > 0) {
            log.debug("Found " + incorrectVariantCount + " records that didn't match SNP or INDEL");
        }
View Full Code Here

     *
     * @param readers - a list of VCFFileReaders, one for each input VCF
     * @param outputHeader - The merged header whose information we intend to use in the final output file
     */
    private SortingCollection<VariantContext> sortInputs(final List<VCFFileReader> readers, final VCFHeader outputHeader) {
        final ProgressLogger readProgress = new ProgressLogger(log, 25000, "read", "records");

        // NB: The default MAX_RECORDS_IN_RAM may not be appropriate here. VariantContexts are smaller than SamRecords
        // We would have to play around empirically to find an appropriate value. We are not performing this optimization at this time.
        final SortingCollection<VariantContext> sorter =
                SortingCollection.newInstance(
                        VariantContext.class,
                        new VCFRecordCodec(outputHeader),
                        outputHeader.getVCFRecordComparator(),
                        MAX_RECORDS_IN_RAM,
                        TMP_DIR);
        int readerCount = 1;
        for (final VCFFileReader reader : readers) {
            log.info("Reading entries from input file " + readerCount);
            for (final VariantContext variantContext : reader) {
                sorter.add(variantContext);
                readProgress.record(variantContext.getChr(), variantContext.getStart());
            }
            reader.close();
            readerCount++;
        }
        return sorter;
View Full Code Here

        }
        return sorter;
    }

    private void writeSortedOutput(final VCFHeader outputHeader, final SortingCollection<VariantContext> sortedOutput) {
        final ProgressLogger writeProgress = new ProgressLogger(log, 25000, "wrote", "records");
        final EnumSet<Options> options = CREATE_INDEX ? EnumSet.of(Options.INDEX_ON_THE_FLY) : EnumSet.noneOf(Options.class);
        final VariantContextWriter out = new VariantContextWriterBuilder().
                setReferenceDictionary(outputHeader.getSequenceDictionary()).
                setOptions(options).
                setOutputFile(OUTPUT).build();
        out.writeHeader(outputHeader);
        for (final VariantContext variantContext : sortedOutput) {
            out.add(variantContext);
            writeProgress.record(variantContext.getChr(), variantContext.getStart());
        }
        out.close();
    }
View Full Code Here

    this.CREATE_INDEX = true;
  }

  @Override
  protected int doWork() {
    final ProgressLogger progress = new ProgressLogger(log, 10000);
    final List<String> sampleList = new ArrayList<String>();
    final Collection<CloseableIterator<VariantContext>> iteratorCollection = new ArrayList<CloseableIterator<VariantContext>>(INPUT.size());
    final Collection<VCFHeader> headers = new HashSet<VCFHeader>(INPUT.size());

    VariantContextComparator variantContextComparator = null;
    SAMSequenceDictionary sequenceDictionary = null;

    if (SEQUENCE_DICTIONARY != null) sequenceDictionary = SAMFileReader.getSequenceDictionary(SEQUENCE_DICTIONARY);

    for (final File file : INPUT) {
      IOUtil.assertFileIsReadable(file);
      final VCFFileReader fileReader = new VCFFileReader(file, false);
      final VCFHeader fileHeader = fileReader.getFileHeader();

      if (variantContextComparator == null) {
        variantContextComparator = fileHeader.getVCFRecordComparator();
      } else {
        if ( ! variantContextComparator.isCompatible(fileHeader.getContigLines())) {
          throw new IllegalArgumentException(
              "The contig entries in input file " + file.getAbsolutePath() + " are not compatible with the others.");
        }
      }

      if (sequenceDictionary == null) sequenceDictionary = fileHeader.getSequenceDictionary();

      if (sampleList.isEmpty()) {
        sampleList.addAll(fileHeader.getSampleNamesInOrder());
      } else {
        if ( ! sampleList.equals(fileHeader.getSampleNamesInOrder())) {
          throw new IllegalArgumentException("Input file " + file.getAbsolutePath() + " has sample entries that don't match the other files.");
        }
      }

      headers.add(fileHeader);
      iteratorCollection.add(fileReader.iterator());
    }

    if (CREATE_INDEX && sequenceDictionary == null) {
      throw new PicardException("A sequence dictionary must be available (either through the input file or by setting it explicitly) when creating indexed output.");
    }

        final VariantContextWriterBuilder builder = new VariantContextWriterBuilder()
                .setOutputFile(OUTPUT)
                .setReferenceDictionary(sequenceDictionary)
                .clearOptions();
        if (CREATE_INDEX)
            builder.setOption(Options.INDEX_ON_THE_FLY);
        final VariantContextWriter writer = builder.build();

    writer.writeHeader(new VCFHeader(VCFUtils.smartMergeHeaders(headers, false), sampleList));

    final MergingIterator<VariantContext> mergingIterator = new MergingIterator<VariantContext>(variantContextComparator, iteratorCollection);
    while (mergingIterator.hasNext()) {
      final VariantContext context = mergingIterator.next();
      writer.add(context);
      progress.record(context.getChr(), context.getStart());
    }

    CloserUtil.close(mergingIterator);
    writer.close();
    return 0;
View Full Code Here

TOP

Related Classes of htsjdk.samtools.util.ProgressLogger

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.