Package picard.illumina

Source Code of picard.illumina.CheckIlluminaDirectory

package picard.illumina;

import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.ProcessExecutor;
import htsjdk.samtools.util.StringUtil;
import picard.PicardException;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.Option;
import picard.cmdline.programgroups.Illumina;
import picard.cmdline.StandardOptionDefinitions;
import picard.illumina.parser.IlluminaDataProviderFactory;
import picard.illumina.parser.IlluminaDataType;
import picard.illumina.parser.IlluminaFileUtil;
import picard.illumina.parser.OutputMapping;
import picard.illumina.parser.ParameterizedFileUtil;
import picard.illumina.parser.ReadStructure;

import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;

/**
* Program to check a lane of an Illumina output directory.  This program checks that files exist, are non-zero in length, for every tile/cycle and
* specified data type.  If NO data type is specified then the default data types used by IlluminaBasecallsToSam are used.
*/
@CommandLineProgramProperties(
        usage = "Check that the files to provide the data specified by DATA_TYPES are available, exist, and are reasonably sized for every tile/cycle.  " +
                "Reasonably sized means non-zero sized for files that exist per tile and equal size for binary files that exist per cycle/per tile. " +
                "CheckIlluminaDirectory DOES NOT check that the individual records in a file are well-formed.",
        usageShort = "Asserts the validity of the data in the specified Illumina basecalling data",
        programGroup = Illumina.class
)
public class CheckIlluminaDirectory extends CommandLineProgram {
    private static final Log log = Log.getInstance(CheckIlluminaDirectory.class);

    // The following attributes define the command-line arguments

    @Option(doc = "The basecalls output directory. ", shortName = "B")
    public File BASECALLS_DIR;

    @Option(doc = "The data types that should be checked for each tile/cycle.  If no values are provided then the data types checked are those " +
            "required by IlluminaBaseCallsToSam (which is a superset of those used in ExtractIlluminaBarcodes).  These data types vary slightly depending on " +
            "whether or not the run is barcoded so READ_STRUCTURE should be the same as that which will be passed to IlluminaBasecallsToSam.  If this option " +
            "is left unspecified then both ExtractIlluminaBarcodes and IlluminaBaseCallsToSam should complete successfully UNLESS the " +
            "individual records of the files themselves are spurious.",
            shortName = "DT", optional = true)
    public final Set<IlluminaDataType> DATA_TYPES = new TreeSet<IlluminaDataType>();

    @Option(doc = ReadStructure.PARAMETER_DOC + " Note:  If you want to check whether or not a future IlluminaBasecallsToSam or ExtractIlluminaBarcodes " +
            "run will fail then be sure to use the exact same READ_STRUCTURE that you would pass to these programs for this run.",
            shortName = "RS")
    public String READ_STRUCTURE;

    @Option(doc = "The number of the lane(s) to check. ", shortName = StandardOptionDefinitions.LANE_SHORT_NAME,
            minElements = 1)
    public List<Integer> LANES;

    @Option(doc = "The number(s) of the tile(s) to check. ", shortName = "T", optional = true)
    public List<Integer> TILE_NUMBERS;

    @Option(doc = "A flag to determine whether or not to create fake versions of the missing files.", shortName = "F",
            optional = true)
    public Boolean FAKE_FILES = false;

    @Option(doc = "A flag to create symlinks to the loc file for the X Ten for each tile.", shortName = "X",
            optional = true)
    public Boolean LINK_LOCS = false;

    /**
     * Required main method implementation.
     */
    public static void main(final String[] argv) {
        new CheckIlluminaDirectory().instanceMainWithExit(argv);
    }

    @Override
    protected int doWork() {
        final ReadStructure readStructure = new ReadStructure(READ_STRUCTURE);
        if (DATA_TYPES.isEmpty()) {
            DATA_TYPES.addAll(Arrays.asList(IlluminaBasecallsConverter.DATA_TYPES_NO_BARCODE));
        }

        final List<Integer> failingLanes = new ArrayList<Integer>();
        int totalFailures = 0;

        final int[] expectedCycles = new OutputMapping(readStructure).getOutputCycles();
        log.info("Checking lanes(" + StringUtil.join(",", LANES) + " in basecalls directory (" + BASECALLS_DIR
                .getAbsolutePath() + ")\n");
        log.info("Expected cycles: " + StringUtil.intValuesToString(expectedCycles));

        for (final Integer lane : LANES) {
            IlluminaFileUtil fileUtil = new IlluminaFileUtil(BASECALLS_DIR, lane);
            final List<Integer> expectedTiles = fileUtil.getExpectedTiles();
            if (!TILE_NUMBERS.isEmpty()) {
                expectedTiles.retainAll(TILE_NUMBERS);
            }

            if (LINK_LOCS) {
                createLocFileSymlinks(fileUtil, lane);
                //we need to create a new file util because it stores a cache to the files it found on
                //construction and this doesn't inclue the recently created symlinks
                fileUtil = new IlluminaFileUtil(BASECALLS_DIR, lane);
            }

            log.info("Checking lane " + lane);
            log.info("Expected tiles: " + StringUtil.join(", ", expectedTiles));

            final int numFailures = verifyLane(fileUtil, expectedTiles, expectedCycles, DATA_TYPES, FAKE_FILES);

            if (numFailures > 0) {
                log.info("Lane " + lane + " FAILED " + " Total Errors: " + numFailures);
                failingLanes.add(lane);
                totalFailures += numFailures;
            } else {
                log.info("Lane " + lane + " SUCCEEDED ");
            }
        }

        int status = 0;
        if (totalFailures == 0) {
            log.info("SUCCEEDED!  All required files are present and non-empty.");
        } else {
            status = totalFailures;
            log.info("FAILED! There were " + totalFailures + " in the following lanes: " + StringUtil
                    .join(", ", failingLanes));
        }

        return status;
    }

    private void createLocFileSymlinks(final IlluminaFileUtil fileUtil, final int lane) {
        final File baseFile = new File(BASECALLS_DIR.getParentFile().getAbsolutePath() + File.separator + "s.locs");
        final File newFileBase = new File(baseFile.getParent() + File.separator + IlluminaFileUtil
                .longLaneStr(lane) + File.separator);
        if (baseFile.exists()) {
            boolean success = true;
            if (!newFileBase.exists()) {
                success = newFileBase.mkdirs();
            }
            if (success) {
                for (final Integer tile : fileUtil.getExpectedTiles()) {
                    final String newName =
                            newFileBase + File.separator + String.format("s_%d_%d.locs", lane, tile);
                    final ProcessExecutor.ExitStatusAndOutput output =
                            ProcessExecutor.executeAndReturnInterleavedOutput(new String[]{"ln", "-fs", baseFile.getAbsolutePath(), newName});
                    if (output.exitStatus != 0) {
                        throw new PicardException("Could not create symlink: " + output.stdout);
                    }
                }
            } else {
                throw new PicardException(String.format("Could not create lane directory: %s.", newFileBase.getAbsolutePath()));
            }
        } else {
            throw new PicardException(String.format("Locations file %s does not exist.", baseFile.getAbsolutePath()));
        }

    }

    /**
     * Use fileUtil to find the data types that would be used by IlluminaDataProvider.  Verify that for the expected
     * tiles/cycles/data types that all the files needed to provide their data is present.  This method logs every
     * error that is found (excluding file faking errors) and returns the number of errors found
     *
     * @param fileUtil      A file util paramterized with the directory/lane to check
     * @param expectedTiles The tiles we expect to be available/well-formed
     * @param cycles        The cycles we expect to be available/well-formed
     * @param dataTypes     The data types we expect to be available/well-formed
     * @return The number of errors found/logged for this directory/lane
     */
    private static final int verifyLane(final IlluminaFileUtil fileUtil, final List<Integer> expectedTiles,
                                        final int[] cycles,
                                        final Set<IlluminaDataType> dataTypes, final boolean fakeFiles) {
        if (expectedTiles.isEmpty()) {
            throw new PicardException(
                    "0 input tiles were specified!  Check to make sure this lane is in the InterOp file!");
        }

        if (cycles.length == 0) {
            throw new PicardException("0 output cycles were specified!");
        }

        int numFailures = 0;

        //find what request IlluminaDataTypes we have files for and select the most preferred file format available for that type
        final Map<IlluminaFileUtil.SupportedIlluminaFormat, Set<IlluminaDataType>> formatToDataTypes =
                IlluminaDataProviderFactory.determineFormats(dataTypes, fileUtil);

        //find if we have any IlluminaDataType with NO available file formats and, if any exist, increase the error count
        final Set<IlluminaDataType> unmatchedDataTypes =
                IlluminaDataProviderFactory.findUnmatchedTypes(dataTypes, formatToDataTypes);
        if (!unmatchedDataTypes.isEmpty()) {
            if (fakeFiles) {
                for (final IlluminaDataType dataType : unmatchedDataTypes) {
                    final IlluminaFileUtil.SupportedIlluminaFormat format =
                            IlluminaDataProviderFactory.findPreferredFormat(dataType, fileUtil);
                    fileUtil.getUtil(format).fakeFiles(expectedTiles, cycles, format);

                }
            }
            log.info("Could not find a format with available files for the following data types: " + StringUtil
                    .join(", ", new ArrayList<IlluminaDataType>(unmatchedDataTypes)));
            numFailures += unmatchedDataTypes.size();
        }

        for (final IlluminaFileUtil.SupportedIlluminaFormat format : formatToDataTypes.keySet()) {
            final ParameterizedFileUtil util = fileUtil.getUtil(format);
            final List<String> failures = util.verify(expectedTiles, cycles);
            //if we have failures and we want to fake files then fake them now.
            if (!failures.isEmpty() && fakeFiles) {
                //fake files
                util.fakeFiles(expectedTiles, cycles, format);

            }
            numFailures += failures.size();
            for (final String failure : failures) {
                log.info(failure);
            }
        }

        return numFailures;
    }

    @Override
    protected String[] customCommandLineValidation() {
        IOUtil.assertDirectoryIsReadable(BASECALLS_DIR);
        final List<String> errors = new ArrayList<String>();

        for (final Integer lane : LANES) {
            if (lane < 1) {
                errors.add(
                        "LANES must be greater than or equal to 1.  LANES passed in " + StringUtil.join(", ", LANES));
                break;
            }
        }

        if (errors.isEmpty()) {
            return null;
        } else {
            return errors.toArray(new String[errors.size()]);
        }
    }
}
TOP

Related Classes of picard.illumina.CheckIlluminaDirectory

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.