Package org.broad.igv.tools.parsers

Source Code of org.broad.igv.tools.parsers.CNParser

/*
* Copyright (c) 2007-2013 The Broad Institute, Inc.
* SOFTWARE COPYRIGHT NOTICE
* This software and its documentation are the copyright of the Broad Institute, Inc. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever. The Broad Institute is not responsible for its use, misuse, or functionality.
*
* This software is licensed under the terms of the GNU Lesser General Public License (LGPL),
* Version 2.1 which is available at http://www.opensource.org/licenses/lgpl-2.1.php.
*/

/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package org.broad.igv.tools.parsers;

import org.apache.log4j.Logger;
import org.broad.igv.Globals;
import org.broad.igv.feature.genome.Genome;
import org.broad.igv.track.TrackType;
import org.broad.igv.util.ParsingUtils;
import org.broad.igv.util.ResourceLocator;
import htsjdk.tribble.readers.AsciiLineReader;

import java.io.IOException;
import java.util.Set;

/**
* @author jrobinso
*/
public class CNParser extends AbstractParser {

    static private Logger log = Logger.getLogger(CNParser.class);

    private static int PROBE_COL = 0;
    int skipColumns;
    // State variables.  This is a serial type parser,  these variables are used to hold temporary
    // state.
    private String chr;
    String lastChr = "";
    int lastPosition = 0;
    private double minValue;
    private double maxValue;
    ResourceLocator resourceLocator;
    Set<String> unsortedChromosomes;
    private int chrColumn;
    private int startColumn;
    private int endColumn;
    private int probeColumn;
    private int firstDataColumn;
    private boolean hasEndLocations;
    private FileType type;
    Genome genome;
    private boolean hasCalls;

    enum FileType {
        IGV, XCN, SNP, CN
    }

    public CNParser(String file, DataConsumer dataConsumer, Genome genome) {
        this(new ResourceLocator(file), dataConsumer, genome);
    }

    /**
     *
     */
    public CNParser(ResourceLocator locator, DataConsumer dataConsumer, Genome genome) {
        super(dataConsumer);
        this.resourceLocator = locator;
        this.genome = genome;

        String tmp = locator.getPath().toLowerCase();
        tmp = tmp.endsWith(".txt") ? tmp.substring(0, tmp.length() - 4) : tmp;

        hasCalls = tmp.endsWith(".xcn") || tmp.endsWith(".snp");

        if (tmp.endsWith(".igv")) {
            chrColumn = 0;
            startColumn = 1;
            endColumn = 2;
            probeColumn = 3;
            firstDataColumn = 4;
            hasEndLocations = true;
            hasCalls = false;
            type = FileType.IGV;
            setTrackType(TrackType.OTHER);
        } else {
            probeColumn = 0;
            chrColumn = 1;
            startColumn = 2;
            endColumn = -1;
            firstDataColumn = 3;
            hasEndLocations = false;
            hasCalls = tmp.endsWith(".xcn") || tmp.endsWith(".snp");
            if (tmp.endsWith(".cn")) {
                type = FileType.CN;
            } else if (tmp.endsWith(".xcn")) {
                type = FileType.XCN;
            } else {
                type = FileType.SNP;
            }
        }
        skipColumns = hasCalls ? 2 : 1;

    }

    public void parseHeader(String[] tokens) {
        int nDataColumns = (tokens.length - firstDataColumn) / skipColumns;
        String[] headings = new String[nDataColumns];
        for (int i = firstDataColumn; i < tokens.length; i += skipColumns) {
            int idx = (i - firstDataColumn) / skipColumns;
            headings[idx] = tokens[i];
        }
        setHeadings(headings);
    }

    /**
     * @return
     */
    public void parse() throws IOException {

        AsciiLineReader reader = null;
        try {

            lastPosition = 0;

            reader = ParsingUtils.openAsciiReader(resourceLocator);
            String nextLine = null;


            // Infer datatype from extension.  This can be overriden in the
            // comment section
            if (isCopyNumberFileExt(resourceLocator.getPath())) {
                setTrackType(TrackType.COPY_NUMBER);
            } else if (isLOHFileExt(resourceLocator.getPath())) {
                setTrackType(TrackType.LOH);
            }

            // Parse comments, if any
            nextLine = reader.readLine();
            while (nextLine.startsWith("#") || (nextLine.trim().length() == 0)) {
                if (nextLine.length() > 0) {
                    parseComment(nextLine);
                }
                nextLine = reader.readLine();
            }
            parseHeader(nextLine.trim().split("\t"));

            setTrackParameters();

            float[] dataArray = new float[getHeadings().length];


            while ((nextLine = reader.readLine()) != null && (nextLine.trim().length() > 0)) {
                String[] tokens = Globals.singleTabMultiSpacePattern.split(nextLine);
                int nTokens = tokens.length;
                if (nTokens == 0) {
                    continue;
                }

                try {

                    chr = (genome == null ? tokens[chrColumn] : genome.getChromosomeAlias(tokens[chrColumn]));
                    if (!chr.equals(lastChr)) {
                        newChromosome();
                    }
                    lastChr = chr;

                    int startPosition = ParsingUtils.parseInt(tokens[startColumn].trim());
                    if (startPosition < lastPosition) {
                        throw new UnsortedException("Error: unsorted file.  .cn files must be sorted by genomic position.");
                    }
                    lastPosition = startPosition;

                    int endPosition = hasEndLocations ? ParsingUtils.parseInt(tokens[endColumn].trim()) : startPosition + 1;

                    // TODO -- compare nTokens with expected number
                    for (int i = firstDataColumn; i < nTokens; i += skipColumns) {
                        int idx = (i - firstDataColumn) / skipColumns;
                        try {
                            dataArray[idx] = Float.parseFloat(tokens[i].trim());
                        } catch (NumberFormatException numberFormatException) {
                            dataArray[idx] = Float.NaN;
                        }
                    }

                    String probe = tokens[probeColumn];

                    getDataConsumer().addData(chr, startPosition, endPosition, dataArray, probe);

                } catch (NumberFormatException e) {
                    log.error("Error parsing number in: " + nextLine + "\n" + e.getMessage(), e);
                }

            }

            parsingComplete();

        } catch (Exception e) {
            log.error(e.getMessage(), e);
        } finally {
            if (reader != null) {
                reader.close();
            }
        }
    }

    /**
     * Method description
     *
     * @return
     */
    public double getMinValue() {
        return minValue;
    }

    /**
     * Method description
     *
     * @return
     */
    public double getMaxValue() {
        return maxValue;
    }

    private void newChromosome() {
        //getDataConsumer().newChromosome(chr);
        lastPosition = -1;
    }

    private void parsingComplete() {
        getDataConsumer().parsingComplete();
    }

    private boolean isCopyNumberFileExt(String filename) {
        String tmp = (filename.endsWith(".txt") || filename.endsWith(".tab") || filename.endsWith(".xls")
                ? filename.substring(0, filename.length() - 4) : filename);
        return tmp.endsWith(".cn") || tmp.endsWith(".xcn") || tmp.endsWith(".snp");
    }

    private boolean isLOHFileExt(String filename) {
        String tmp = (filename.endsWith(".txt") || filename.endsWith(".tab") || filename.endsWith(".xls")
                ? filename.substring(0, filename.length() - 4) : filename);
        return tmp.endsWith(".loh");
    }
}
TOP

Related Classes of org.broad.igv.tools.parsers.CNParser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.