Package picard.illumina.parser

Source Code of picard.illumina.parser.BclParserTest

package picard.illumina.parser;

import htsjdk.samtools.util.IOUtil;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import picard.PicardException;
import picard.illumina.parser.readers.BclQualityEvaluationStrategy;

import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;

import static picard.illumina.parser.BinTdUtil.*;

public class BclParserTest {
    public static final File TEST_DATA_DIR = new File("testdata/picard/illumina/25T8B25T/Data/Intensities/BaseCalls/L001");
    public static final File MULTI_TILE_DATA_DIR = new File("/seq/tng/jcarey/testdata/NextSeq/Data/Intensities/BaseCalls/L001");
    public static final String READ_STRUCTURE = "25T8B25T";
    public static final String READ_STRUCTURE_WSKIPS = "25S8B25S";
    public static final String READ_STRUCTURE_WSKIPS_PARTIAL = "10T5S10T8B5T20S";
    public static final String READ_STRUCTURE_WSKIPS_BAD = "10T5S10T2B4S2B5T20S";
    public static final int[] READ_LENGTHS = new int[]{25, 8, 25};
    public static final int LANE = 1;
    public static final IlluminaDataType DATA_TYPES[] = {IlluminaDataType.BaseCalls, IlluminaDataType.QualityScores};
    public static final int TILE_SIZES = 60;


    public static Integer[] boxArr(final int[] ints) {
        final Integer[] boxArr = new Integer[ints.length];
        for (int i = 0; i < boxArr.length; i++) {
            boxArr[i] = ints[i];
        }

        return boxArr;
    }

    @DataProvider(name = "tileMaps")
    public Object[][] getTileMaps() {
        return new Object[][]{
                //TILES, NUM_CLUSTER TO BE READ, SEEK AT THIS READ, INDEX TO TILETOSEEK, ORDERED TILE INDEX (FOR OUT OF ORDER TILES)
                {new int[]{1101, 1201, 2101}, 180, -1, -1, -1},
                {new int[]{1101, 2101, 1201}, 180, -1, -1, -1},
                {new int[]{2101, 1201}, 120, -1, -1, -1},
                {new int[]{1101, 2101}, 120, -1, -1, -1},
                {new int[]{1101}, 60, -1, -1, -1},

                //Cases with seeking
                {new int[]{1101, 1201, 2101}, 206, 25, 0, 0},
                {new int[]{1101, 2101, 1201}, 206, 25, 0, 0},
                {new int[]{2101, 1201}, 80, 19, 0, 1},
                {new int[]{1101, 2101}, 156, 35, 0, 0},
                {new int[]{1101}, 66, 5, 0, 0}
        };
    }

    @DataProvider(name = "multiTileMaps")
    public Object[][] getMultiTileMaps() {
        return new Object[][]{
                //TILES, NUM_CLUSTER TO BE READ, SEEK AT THIS READ, INDEX TO TILETOSEEK, ORDERED TILE INDEX (FOR OUT OF ORDER TILES)
                {new int[]{11101, 11102, 11103}, 341292, -1, -1, -1}
        };
    }


    public void compareClusterToBclData(final ClusterData cluster, final BclData bclData, final int clusterNum, final int countNum) {
        final byte[][] bases = bclData.getBases();
        final byte[][] qualities = bclData.getQualities();

        Assert.assertEquals(bases.length, cluster.getNumReads(), "At cluster num " + clusterNum);
        Assert.assertEquals(qualities.length, cluster.getNumReads(), "At cluster num " + clusterNum);

        final StringBuilder baseBuilder = new StringBuilder();
        final StringBuilder qualBuilder = new StringBuilder();
        final StringBuilder barcode = new StringBuilder();
        baseBuilder.append("new byte[]{");
        for (int i = 0; i < bases.length; i++) {
            final byte[] subBase = bases[i];
            final byte[] subQual = qualities[i];
            for (int j = 0; j < subBase.length; j++) {
                if ((char) subBase[j] == '.') {
                    baseBuilder.append('P');
                } else {
                    baseBuilder.append((char) subBase[j]);
                }
                baseBuilder.append(",");
                qualBuilder.append(subQual[j]);
                qualBuilder.append(",");
                if (i == 1) {
                    if ((char) subBase[j] == '.') {
                        barcode.append('P');
                    } else {
                        barcode.append((char) subBase[j]);
                    }
                }
            }
        }
        baseBuilder.deleteCharAt(baseBuilder.length() - 1);
        qualBuilder.deleteCharAt(qualBuilder.length() - 1);
        baseBuilder.append("},\n                                new byte[]{");
        baseBuilder.append(qualBuilder.toString());
        baseBuilder.append("},\n                                \"");
        baseBuilder.append(barcode.toString());
        baseBuilder.append("\"");

        for (int i = 0; i < cluster.getNumReads(); i++) {
            if (!Arrays.equals(bases[i], cluster.getRead(i).getBases())) {
                System.out.println(cluster.getLane() + " : " + cluster.getTile() + " : " + clusterNum);
                System.out.println(baseBuilder.toString());
            }
            Assert.assertEquals(bases[i], cluster.getRead(i).getBases(), " Bases differ for read " + i + " at cluster num " + clusterNum + " at cluster count " + countNum);
            Assert.assertEquals(qualities[i], cluster.getRead(i).getQualities(), " Qualities differ for read " + i + " at cluster num " + clusterNum + " at cluster count " + countNum);
        }
    }

    public void fullBclParserTestImpl(final File dir, final String readStructure, final int[] tiles, final int size, final int seekAfter, final int newTileIndex, final int orderedTileIndex,
                                      final boolean multiTile) {
        final ReadStructure rs = new ReadStructure(readStructure);
        final OutputMapping outputMapping = new OutputMapping(rs);
        final IlluminaFileUtil util = new IlluminaFileUtil(dir.getParentFile(), LANE);
        final PerTilePerCycleFileUtil bclFileUtil = (PerTilePerCycleFileUtil) util.getUtil(IlluminaFileUtil.SupportedIlluminaFormat.Bcl);
        final MultiTileBclFileUtil multiTileBclFileUtil = (MultiTileBclFileUtil) util.getUtil(IlluminaFileUtil.SupportedIlluminaFormat.MultiTileBcl);


        final List<Integer> tileIntegers = new ArrayList<Integer>();
        for (final int tile : tiles) {
            tileIntegers.add(tile);
        }

        final BclParser bclParser;
        if(multiTile){
            final File bci = new File(MULTI_TILE_DATA_DIR, "s_" + LANE + ".bci");
            bclParser = new MultiTileBclParser(dir, LANE, multiTileBclFileUtil.getFiles(tileIntegers, outputMapping.getOutputCycles()),
                    outputMapping, true, new BclQualityEvaluationStrategy(BclQualityEvaluationStrategy.ILLUMINA_ALLEGED_MINIMUM_QUALITY),
                    new TileIndex(bci));
        }
        else{
            bclParser = new BclParser(dir, LANE, bclFileUtil.getFiles(tileIntegers, outputMapping.getOutputCycles()),
                    outputMapping, new BclQualityEvaluationStrategy(BclQualityEvaluationStrategy.ILLUMINA_ALLEGED_MINIMUM_QUALITY));
        }


        final Map<Integer, ClusterData> testData = BinTdUtil.clusterData(LANE, Arrays.asList(boxArr(tiles)), readStructure, DATA_TYPES);

        int count = 0;
        int readNum = 0;
        while (bclParser.hasNext()) {
            final BclData bclData = bclParser.next();
            if (testData.containsKey(readNum)) {
                compareClusterToBclData(testData.get(readNum), bclData, readNum, count);
            }

            if (count == seekAfter) {
                bclParser.seekToTile(tiles[newTileIndex]);
                readNum = (orderedTileIndex * TILE_SIZES);
            } else {
                readNum++;
            }
            count++;
        }
        Assert.assertEquals(count, size);
        bclParser.close();
    }

    public static void deleteBclFiles(final File laneDirectory, final String readStructure) {
        final ReadStructure rs = new ReadStructure(readStructure);
        int index = 1;
        for (final ReadDescriptor rd : rs.descriptors) {
            if (rd.type == ReadType.S) {
                for (int i = index; i < index + rd.length; i++) {
                    final File cycleDir = new File(laneDirectory, "C" + i + ".1");
                    final File[] cycleFiles = cycleDir.listFiles();
                    for (final File toDelete : cycleFiles) {
                        if (!toDelete.delete()) {
                            throw new RuntimeException("Couldn't delete file " + toDelete.getAbsolutePath());
                        }
                    }
                }
            }

            index += rd.length;
        }
    }

    @Test(dataProvider = "tileMaps")
    public void fullBclParserTest(final int[] tiles, final int size, final int seekAfter, final int newTileIndex, final int orderedTileIndex) {
        fullBclParserTestImpl(TEST_DATA_DIR, READ_STRUCTURE, tiles, size, seekAfter, newTileIndex, orderedTileIndex, false);
    }

    //@Test(dataProvider = "multiTileMaps")
    public void fullMTBclParserTest(final int[] tiles, final int size, final int seekAfter, final int newTileIndex, final int orderedTileIndex) {
        fullBclParserTestImpl(MULTI_TILE_DATA_DIR, READ_STRUCTURE, tiles, size, seekAfter, newTileIndex, orderedTileIndex, true);
    }


    @Test(dataProvider = "tileMaps")
    public void fullBclParserTestWSkips(final int[] tiles, final int size, final int seekAfter, final int newTileIndex, final int orderedTileIndex) {
        fullBclParserTestImpl(TEST_DATA_DIR, READ_STRUCTURE_WSKIPS, tiles, size, seekAfter, newTileIndex, orderedTileIndex, false);
    }

    @Test(dataProvider = "tileMaps")
    public void fullBclParserTestWDeletedSkips(final int[] tiles, final int size, final int seekAfter, final int newTileIndex, final int orderedTileIndex) {
        fullBclParserTestWDeletedSkipsImpl(tiles, size, seekAfter, newTileIndex, orderedTileIndex, READ_STRUCTURE_WSKIPS);
    }

    @Test(dataProvider = "tileMaps")
    public void fullBclParserTestWPartiallyDeletedSkips(final int[] tiles, final int size, final int seekAfter, final int newTileIndex, final int orderedTileIndex) {
        fullBclParserTestWDeletedSkipsImpl(tiles, size, seekAfter, newTileIndex, orderedTileIndex, READ_STRUCTURE_WSKIPS_PARTIAL);
    }

    @Test(dataProvider = "tileMaps", expectedExceptions = RuntimeException.class)
    public void fullBclParserTestWBadDeletedSkips(final int[] tiles, final int size, final int seekAfter, final int newTileIndex, final int orderedTileIndex) {
        fullBclParserTestWDeletedSkipsImpl(tiles, size, seekAfter, newTileIndex, orderedTileIndex, READ_STRUCTURE_WSKIPS_BAD);
    }

    public void fullBclParserTestWDeletedSkipsImpl(final int[] tiles, final int size, final int seekAfter, final int newTileIndex, final int orderedTileIndex, final String readStructure) {
        final File basecallDir = IOUtil.createTempDir("bclParserTest", "BaseCalls");

        Exception exc = null;
        try {
            final File l001 = new File(basecallDir, "L001");
            if (!l001.mkdir()) {
                throw new RuntimeException("Couldn't make lane dir " + l001.getAbsolutePath());
            }

            copyBcls(TEST_DATA_DIR, l001);
            deleteBclFiles(l001, readStructure);
            fullBclParserTestImpl(l001, READ_STRUCTURE_WSKIPS, tiles, size, seekAfter, newTileIndex, orderedTileIndex, false);
        } catch (final Exception thrExc) {
            exc = thrExc;
        } finally {
            IOUtil.deleteDirectoryTree(basecallDir);
        }
        if (exc != null) {
            if (exc.getClass() == PicardException.class) {
                throw new PicardException(exc.getMessage());
            }
            throw new RuntimeException(exc);
        }
    }

    //Custom copy function to avoid copying .svn files etc...
    public static void copyBcls(final File srcLaneDir, final File dstDir) {
        final File[] listFiles = srcLaneDir.listFiles();

        for (final File dir : listFiles) {
            if (dir.isDirectory()) {
                File cycleDir = null;

                for (final File file : dir.listFiles()) {
                    if (file.getName().endsWith(".bcl")) {
                        if (cycleDir == null) {
                            cycleDir = new File(dstDir, dir.getName());
                            if (!cycleDir.mkdir()) {
                                throw new RuntimeException("Couldn't make directory (" + cycleDir.getAbsolutePath() + ")");
                            }
                        }

                        IOUtil.copyFile(file, new File(cycleDir, file.getName()));
                    }
                }
            }
        }
    }


    //Helper byte [] tuple for EAMSS testing
    class BasesAndQuals {
        public final byte[] bases;
        public final byte[] quals;
        public final byte[] maskedQuals;

        public BasesAndQuals(final byte[] bases, final byte[] quals, final Integer maskStart) {
            this.bases = bases;
            this.quals = quals;
            this.maskedQuals = qualsMaskedFrom(maskStart);
        }

        private byte[] qualsMaskedFrom(final Integer maskStart) {
            final byte[] maskedQuals = Arrays.copyOf(quals, quals.length);
            if (maskStart != null) {
                for (int i = maskStart; i < maskedQuals.length; i++) {
                    maskedQuals[i] = BclParser.MASKING_QUALITY;
                }
            }
            return maskedQuals;
        }

        public String toString() {
            return "BasesAndQuals( " + basesToString() + ", " + qualsToString(quals) + ", " + qualsToString(maskedQuals) + ")";
        }

        public String basesToString() {
            final StringBuilder sb = new StringBuilder(bases.length);
            for (final byte base : bases) {
                switch (base) {
                    case A:
                        sb.append("A ");
                        break;
                    case C:
                        sb.append("C ");
                        break;
                    case G:
                        sb.append("G ");
                        break;
                    case T:
                        sb.append("T ");
                        break;
                    case P:
                        sb.append(". ");
                        break;

                    default:
                        throw new RuntimeException("THIS SHOULD NOT HAPPEN!  Bad byte in bases!");
                }
            }

            return sb.toString();
        }

        public String qualsToString(final byte[] qualsToConvert) {

            final StringBuilder sb = new StringBuilder(bases.length);
            for (final byte qual : qualsToConvert) {
                sb.append(String.valueOf((int) qual));
                sb.append(",");
            }

            return sb.toString();
        }
    }

    @DataProvider(name = "eamssDataNo10GSeries")
    public Object[][] eamssDataNo10GSeries() {

        return new Object[][]{
                //Non-masking cases

                //tally very negative, 9G's
                {new BasesAndQuals(new byte[]{G, G, G, G, G, G, G, G, G},
                        new byte[]{13, 7, 35, 32, 31, 33, 31, 26, 29},
                        null)},

                //tally barely negative
                {new BasesAndQuals(new byte[]{G, G, G, G, G, G, G, G, G},
                        new byte[]{13, 7, 35, 26, 18, 19, 35, 8, 33},
                        null)},

                //Reaches 0, A stretch of more than 10 other types of bases
                {new BasesAndQuals(new byte[]{A, C, C, C, C, C, C, C, C, C, C, C, T, G, G, C, T, A, A},
                        new byte[]{7, 8, 33, 7, 2, 33, 16, 17, 19, 7, 6, 5, 35, 2, 33, 22, 18, 16, 25},
                        null)},

                //Stays at 0, Stretches of G's Separated
                {new BasesAndQuals(new byte[]{T, G, G, G, G, G, G, G, P, P, G, G, G, G, G, G, G, T, A, A, G, G, G},
                        new byte[]{7, 8, 33, 7, 2, 33, 16, 17, 2, 2, 6, 5, 35, 2, 33, 22, 18, 16, 25, 33, 32, 16, 18},
                        null)},

                //shorter
                {new BasesAndQuals(new byte[]{T, A, C},
                        new byte[]{25, 16, 16},
                        null)},

                //Longer
                {new BasesAndQuals(new byte[]{T, A, C, G, G, P, P, T, C, C, C, C, T, T, T, G, G, G, A, T, G, C, A, T, A, C, G, G, P, P, T, C, C, C, C, T, T, T, G, G, G, A, T, G, C, A},
                        new byte[]{25, 16, 16, 33, 22, 2, 2, 33, 35, 3, 31, 38, 22, 19, 25, 16, 16, 31, 30, 2, 2, 33, 26, 3, 31, 38, 22, 19, 2, 2, 30, 27, 28, 16, 2, 2, 30, 16, 19, 21, 22, 17, 19, 16, 16, 16},
                        null)},


                //Masking-Cases

                //tally very positive, 9Gs                                  X - Mask From here
                {new BasesAndQuals(new byte[]{G, G, G, G, G, G, G, G, G},
                        new byte[]{13, 7, 35, 32, 2, 16, 33, 14, 19},
                        7)},

                //tally barely negative                        X - Mask from here
                {new BasesAndQuals(new byte[]{G, G, G, G, G, G, G, G, G},
                        new byte[]{13, 7, 35, 33, 18, 2, 6, 8, 33},
                        4)},
                //Reaches 0, A stretch of more than 10 other types of bases                X - Mask From here
                {new BasesAndQuals(new byte[]{A, C, C, C, C, C, C, C, C, C, C, C, T, G, G, C, T, A, A},
                        new byte[]{7, 8, 33, 7, 2, 33, 16, 17, 19, 32, 33, 5, 8, 2, 2, 2, 33, 16, 25},
                        11)},
                //Stays at 0, Stretches of G's Separated                                                       X- Mask from here
                {new BasesAndQuals(new byte[]{T, G, G, G, G, G, G, G, P, P, G, G, G, G, G, G, G, T, A, A, G, G, G},
                        new byte[]{7, 8, 33, 7, 2, 33, 16, 17, 2, 2, 6, 5, 35, 2, 33, 30, 13, 16, 7, 2, 2, 16, 18},
                        16)},
                //shorter                       X - Mask from here
                {new BasesAndQuals(new byte[]{T, A, C},
                        new byte[]{2, 11, 13},
                        0)},
                //Longer                                                                                                                           X- Mask from here
                {new BasesAndQuals(new byte[]{T, A, C, G, G, P, P, T, C, C, C, C, T, T, T, G, G, G, A, T, G, C, A, T, A, C, G, G, P, P, T, C, C, C, C, T, T, T, G, G, G, A, T, G, C, A},
                        new byte[]{25, 16, 16, 33, 22, 2, 2, 33, 35, 3, 31, 38, 22, 19, 25, 16, 16, 31, 30, 2, 2, 33, 26, 3, 31, 38, 22, 19, 2, 2, 30, 27, 28, 16, 2, 2, 30, 16, 19, 21, 22, 2, 19, 16, 2, 2},
                        26)}
        };
    }

    /** For more information on EAMSS check BclParser and the large comment above runEamssForReadInPlace * */
    @DataProvider(name = "eamssDataWithGSeries")
    public Object[][] eamssTestDat() {
        return new Object[][]{

                //9 G's followed by tally max                     X - Mask from here
                {new BasesAndQuals(new byte[]{A, C, G, G, T, G, G, G, G, G, G, G, G, G, A, C, T},
                        new byte[]{7, 8, 33, 7, 12, 33, 16, 17, 2, 2, 32, 35, 35, 35, 2, 15, 9},
                        5)},
                //9 G's surpassed by tally max  X - Mask from here
                {new BasesAndQuals(new byte[]{A, C, G, G, T, G, G, G, G, G, G, G, G, G, A, C, T},
                        new byte[]{7, 8, 2, 7, 2, 2, 16, 17, 2, 2, 32, 35, 35, 35, 2, 15, 9},
                        0)},
                //10 G's ending before tally max                                                        X - Mask from here
                {new BasesAndQuals(new byte[]{A, C, G, G, T, G, G, G, G, G, G, G, G, G, A, C, T},
                        new byte[]{7, 8, 2, 7, 2, 2, 16, 17, 2, 2, 32, 35, 35, 35, 33, 15, 9},
                        15)},
                //10 G's ending on tally max                         X - Mask from here Is this wrong?
                {new BasesAndQuals(new byte[]{A, C, C, G, C, C, G, G, G, G, G, G, G, G, G, G, T, T, A},
                        new byte[]{33, 31, 29, 32, 28, 27, 30, 18, 18, 18, 18, 19, 19, 19, 33, 18, 9, 9, 19},
                        6)},
                //10 G's no masking
                {new BasesAndQuals(new byte[]{A, C, C, G, C, C, G, G, G, G, G, G, G, G, G, G, T, T, A},
                        new byte[]{33, 31, 29, 32, 28, 27, 30, 18, 18, 18, 18, 19, 19, 19, 33, 18, 33, 32, 34},
                        null)},
                //10 G' with an exception                               X - Mask from here
                {new BasesAndQuals(new byte[]{A, C, C, G, C, C, G, G, G, G, A, G, G, G, G, G, T, T, A},
                        new byte[]{33, 31, 29, 32, 28, 27, 30, 18, 18, 18, 18, 19, 19, 19, 33, 18, 9, 9, 19},
                        6)},
                //longer than 10 G's                      X - Mask from here
                {new BasesAndQuals(new byte[]{A, C, G, G, G, C, G, G, G, G, G, G, G, G, G, G, T, T, A},
                        new byte[]{33, 31, 29, 32, 28, 16, 33, 18, 18, 18, 18, 19, 19, 19, 33, 18, 3, 9, 19},
                        2)},
                //longer than 10 G's                                      X - Mask from here
                {new BasesAndQuals(new byte[]{A, C, G, G, C, C, G, G, G, G, G, G, G, G, G, G, G, G, G},
                        new byte[]{33, 31, 29, 32, 28, 16, 33, 18, 18, 18, 18, 19, 19, 19, 33, 34, 33, 33, 3},
                        6)},
                //longer than 10 G's
                {new BasesAndQuals(new byte[]{A, C, G, G, C, C, G, G, G, G, G, G, G, G, G, G, G, G, G, T, A, C, T, T, G, G, G, G, G, G, G, G, G, G, G, G, G},
                        new byte[]{33, 31, 29, 32, 28, 16, 33, 18, 18, 18, 18, 19, 19, 19, 33, 34, 33, 33, 3, 33, 34, 2, 4, 8, 33, 7, 35, 15, 16, 31, 30, 38, 16, 15, 22, 29, 25},
                        null)}
        };
    }

    public void testEamss(final BasesAndQuals bq) {
        final byte[] bases = Arrays.copyOf(bq.bases, bq.bases.length);
        final byte[] quals = Arrays.copyOf(bq.quals, bq.quals.length);

        BclParser.runEamssForReadInPlace(bases, quals);
        Assert.assertEquals(bases, bq.bases);
        Assert.assertEquals(quals, bq.maskedQuals);
    }

    @Test(dataProvider = "eamssDataNo10GSeries")
    public void eamssParsingTestNoGSeries(final BasesAndQuals bq) {
        testEamss(bq);
    }

    @Test(dataProvider = "eamssDataWithGSeries")
    public void eamssParsingTestWithGSeries(final BasesAndQuals bq) {
        testEamss(bq);
    }
}
TOP

Related Classes of picard.illumina.parser.BclParserTest

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.