Package htsjdk.tribble.readers

Examples of htsjdk.tribble.readers.AsciiLineReader


     * Method description
     */
    public void parse() {


        AsciiLineReader reader = null;

        try {
            log.debug("Loading data for: " + filename);
            reader = new AsciiLineReader(new FileInputStream(filename));

            // Parse comments
            parseAttributes(reader);

            // Parse header
            parseHeaderRow(reader);

            // Parse data
            String nextLine = reader.readLine();

            while ((nextLine = reader.readLine()) != null) {
                if (!nextLine.startsWith("#")) {

                    String[] tokens = Globals.tabPattern.split(nextLine, -1);
                    int nTokens = tokens.length;
                    if (nTokens > 11) {
                        try {
                            String batchId = new String(tokens[0].trim());

                            String hairpinName = new String(tokens[4].trim().toUpperCase());

                            float scoreMean = Float.NaN;
                            try {
                                scoreMean = Float.parseFloat(tokens[8]);

                            } catch (NumberFormatException numberFormatException) {

                                // Nothing to do -- expected condition
                            }

                            float scoreSTD = 0;
                            try {
                                scoreSTD = Float.parseFloat(tokens[9]);
                            } catch (NumberFormatException numberFormatException) {

                                // Nothing to do -- expected condition
                            }

                            String gene = new String(tokens[13].trim().toUpperCase());

                            RNAIHairpinValue score = new RNAIHairpinValue(hairpinName, scoreMean,
                                    scoreSTD);

                            // Make batch_conditon key
                            // Rules from Jessee -- ignore conditions starting with *.  None, standard,
                            // and blank are all equivalent.
                            String cond = tokens[2].trim();
                            if (!cond.startsWith("*")) {
                                if (cond.equals("None") || cond.equals("Standard")) {
                                    cond = "";
                                }

                                String batchCond = batchId + "_" + cond;

                                RNAIHairpinCache.getInstance().addHairpinScore(batchCond, gene,
                                        score);
                            }
                        } catch (Exception ex) {
                            log.error("Skipping line: " + nextLine, ex);
                        }
                    }
                }
            }

        } catch (FileNotFoundException e) {
            log.error("RNAI file not found: " + filename);
            throw new RuntimeException("File not found: " + filename);

        } catch (IOException e) {
            log.error(filename, e);
            throw new RuntimeException("Error parsing file " + filename, e);
        } finally {
            if (reader != null) {
                reader.close();
            }
        }
    }
View Full Code Here


        if (birdsuite) {
            dataset.setTrackType(TrackType.CNV);
        }

        AsciiLineReader reader = null;
        String nextLine = null;
        int lineNumber = 0;
        try {
            reader = ParsingUtils.openAsciiReader(locator);

            // Parse comments, if any
            nextLine = reader.readLine();
            while (nextLine.startsWith("#") || (nextLine.trim().length() == 0)) {
                lineNumber++;
                if (nextLine.length() > 0) {
                    parseComment(nextLine, dataset);
                }
                nextLine = reader.readLine();
            }

            // Read column headings
            String[] headings = nextLine.split("\t");


            if (birdsuite) {
                //sample  sample_index  copy_number  chr  start  end  confidence
                sampleColumn = 0;
                dataColumn = 2;
                chrColumn = 3;
                startColumn = 4;
                endColumn = 5;

            } else {
                sampleColumn = 0;
                chrColumn = 1;
                startColumn = 2;
                endColumn = 3;
                dataColumn = headings.length - 1;
            }

            while ((nextLine = reader.readLine()) != null && (nextLine.trim().length() > 0)) {
                lineNumber++;

                String[] tokens = Globals.tabPattern.split(nextLine, -1);
                int nTokens = tokens.length;
                if (nTokens > 4) {
                    int start;
                    int end;
                    try {
                        start = ParsingUtils.parseInt(tokens[startColumn].trim());
                    } catch (NumberFormatException numberFormatException) {
                        throw new ParserException("Column " + (startColumn + 1) + " must contain a numeric value.",
                                lineNumber, nextLine);
                    }
                    try {
                        end = ParsingUtils.parseInt(tokens[endColumn].trim());
                    } catch (NumberFormatException numberFormatException) {
                        throw new ParserException("Column " + (endColumn + 1) + " must contain a numeric value.",
                                lineNumber, nextLine);
                    }

                    String chr = tokens[chrColumn].trim();
                    if (genome != null) {
                        chr = genome.getChromosomeAlias(chr);
                    }


                    String trackId = new String(tokens[sampleColumn].trim());

                    StringBuffer desc = null;
                    if (birdsuite) {
                        desc = new StringBuffer();
                        desc.append("<br>");
                        desc.append(headings[6]);
                        desc.append("=");
                        desc.append(tokens[6]);
                    } else {
                        if (tokens.length > 4) {
                            desc = new StringBuffer();
                            for (int i = 4; i < headings.length - 1; i++) {
                                desc.append("<br>");
                                desc.append(headings[i]);
                                desc.append(": ");
                                desc.append(tokens[i]);
                            }
                        }
                    }


                    try {
                        float value = Float.parseFloat(tokens[dataColumn]);
                        String description = desc == null ? null : desc.toString();
                        dataset.addSegment(trackId, chr, start, end, value, description);
                    } catch (NumberFormatException numberFormatException) {
                        // log.info("Skipping line: " + nextLine);
                    }
                }
            }

        } catch (DataLoadException pe) {
            throw pe;
        } catch (ParserException pe) {
            throw pe;
        } catch (Exception e) {
            if (nextLine != null && lineNumber != 0) {
                throw new ParserException(e.getMessage(), e, lineNumber, nextLine);
            } else {
                throw new RuntimeException(e);
            }
        } finally {
            if (reader != null) {
                reader.close();
            }
        }

        dataset.sortLists();
        return dataset;
View Full Code Here

     *
     * @param file
     * @return
     */
    public static boolean isWiggle(ResourceLocator file) {
        AsciiLineReader reader = null;
        try {
            reader = ParsingUtils.openAsciiReader(file);
            String nextLine = null;
            int lineNo = 0;
            while ((nextLine = reader.readLine()) != null && (nextLine.trim().length() > 0)) {
                if (nextLine.startsWith("track") && nextLine.contains("wiggle_0")) {
                    return true;
                }
                if (lineNo++ > 100) {
                    break;
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
            return false;
        } finally {
            if (reader != null) {
                reader.close();
            }
        }
        return false;
    }
View Full Code Here

        }
        return false;
    }

    private void parseHeader() {
        AsciiLineReader reader = null;

        // The DataConsumer interface takes an array of data per position, however wig
        // files contain a single data point.  Create an "array" once that can
        // be resused

        try {

            reader = ParsingUtils.openAsciiReader(resourceLocator);

            while ((nextLine = reader.readLine()) != null && (nextLine.trim().length() > 0)) {

                // Skip comment lines
                if (nextLine.startsWith("#") || nextLine.startsWith("data") || nextLine.startsWith(
                        "browser") || nextLine.trim().length() == 0) {
                    continue;
                }

                if (nextLine.startsWith("track")) {
                    trackLine = nextLine;

                } else {
                    return;
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            if (reader != null) {
                reader.close();
            }
        }
    }
View Full Code Here

     * @throws IOException
     */
    public static MAFIndex createIndex(String alignmentFile) throws IOException {

        MAFIndex index = new MAFIndex();
        AsciiLineReader reader = ParsingUtils.openAsciiReader(new ResourceLocator(alignmentFile));


        try {

            String lastChr = null;
            int intervalStart = 0;
            int intervalEnd = 0;
            int blockCount = 0;
            String line;
            long lastOffset = 0;
            boolean newBlock = false;

            Set<String> allSpecies = new HashSet<String>();
            List<String> blockSpecies = new ArrayList<String>();
            Map<String, RunningAverage> speciesRanks = new HashMap<String, RunningAverage>();

            while ((line = reader.readLine()) != null) {
                //Ignore all comment lines
                if (line.startsWith("#") || line.trim().length() == 0) {
                    continue;
                }

                if (line.startsWith("a ")) {
                    newBlock = true;
                    blockCount++;

                    // Merge species list, if any, from previous block and start new one
                    mergeSpecies(blockSpecies, allSpecies, speciesRanks);
                    blockSpecies.clear();

                } else if (line.startsWith("s ")) {

                    String[] tokens = Globals.whitespacePattern.split(line);

                    String src = tokens[1];
                    String species = src;
                    String chr = src;
                    if (src.contains(".")) {
                        String[] srcTokens = ParsingUtils.PERIOD_PATTERN.split(src);
                        species = srcTokens[0];
                        chr = srcTokens[1];
                    }
                    if (lastChr == null) lastChr = chr;

                    blockSpecies.add(species);

                    if (newBlock) {
                        // This will be the reference sequence line (its always first after the "a")
                        int start = Integer.parseInt(tokens[2]);
                        int end = Integer.parseInt(tokens[3]) + start;

                        if ((!chr.equals(lastChr) && blockCount > 0) ||
                                blockCount > blockSize) {

                            // Record previous interval and start a new one.
                            index.insertInterval(lastChr, intervalStart, intervalEnd, lastOffset);

                            blockCount = 1;
                            lastOffset = reader.getPosition();
                            intervalStart = start;
                        }

                        lastChr = chr;
                        intervalEnd = end;
                        newBlock = false;
                    }

                } else if (line.startsWith("i ")) {
                    //We do not handle information lines yet.
                    continue;
                } else if (line.startsWith("q ")) {
                    //We do not handle quality lines yet.
                    continue;
                }
            }


            if (blockCount > 0) {
                index.insertInterval(lastChr, intervalStart, intervalEnd, lastOffset);
            }

            // Merge species list, if any, from previous block and start new one
            mergeSpecies(blockSpecies, allSpecies, speciesRanks);
            index.setSpecies(sortSpecies(allSpecies, speciesRanks));

            return index;


        } finally {
            reader.close();

        }

    }
View Full Code Here

    /**
     * @return
     */
    public void parse() throws IOException {

        AsciiLineReader reader = null;
        try {

            lastPosition = 0;

            reader = ParsingUtils.openAsciiReader(resourceLocator);
            String nextLine = null;


            // Infer datatype from extension.  This can be overriden in the
            // comment section
            if (isCopyNumberFileExt(resourceLocator.getPath())) {
                setTrackType(TrackType.COPY_NUMBER);
            } else if (isLOHFileExt(resourceLocator.getPath())) {
                setTrackType(TrackType.LOH);
            }

            // Parse comments, if any
            nextLine = reader.readLine();
            while (nextLine.startsWith("#") || (nextLine.trim().length() == 0)) {
                if (nextLine.length() > 0) {
                    parseComment(nextLine);
                }
                nextLine = reader.readLine();
            }
            parseHeader(nextLine.trim().split("\t"));

            setTrackParameters();

            float[] dataArray = new float[getHeadings().length];


            while ((nextLine = reader.readLine()) != null && (nextLine.trim().length() > 0)) {
                String[] tokens = Globals.singleTabMultiSpacePattern.split(nextLine);
                int nTokens = tokens.length;
                if (nTokens == 0) {
                    continue;
                }

                try {

                    chr = (genome == null ? tokens[chrColumn] : genome.getChromosomeAlias(tokens[chrColumn]));
                    if (!chr.equals(lastChr)) {
                        newChromosome();
                    }
                    lastChr = chr;

                    int startPosition = ParsingUtils.parseInt(tokens[startColumn].trim());
                    if (startPosition < lastPosition) {
                        throw new UnsortedException("Error: unsorted file.  .cn files must be sorted by genomic position.");
                    }
                    lastPosition = startPosition;

                    int endPosition = hasEndLocations ? ParsingUtils.parseInt(tokens[endColumn].trim()) : startPosition + 1;

                    // TODO -- compare nTokens with expected number
                    for (int i = firstDataColumn; i < nTokens; i += skipColumns) {
                        int idx = (i - firstDataColumn) / skipColumns;
                        try {
                            dataArray[idx] = Float.parseFloat(tokens[i].trim());
                        } catch (NumberFormatException numberFormatException) {
                            dataArray[idx] = Float.NaN;
                        }
                    }

                    String probe = tokens[probeColumn];

                    getDataConsumer().addData(chr, startPosition, endPosition, dataArray, probe);

                } catch (NumberFormatException e) {
                    log.error("Error parsing number in: " + nextLine + "\n" + e.getMessage(), e);
                }

            }

            parsingComplete();

        } catch (Exception e) {
            log.error(e.getMessage(), e);
        } finally {
            if (reader != null) {
                reader.close();
            }
        }
    }
View Full Code Here

        }
    }


    public void loadMapping(String urlString, Map<String, String[]> map) {
        AsciiLineReader bufReader = null;
        InputStream is = null;
        try {
            if (HttpUtils.isRemoteURL(urlString)) {
                URL url = new URL(urlString);
                is = HttpUtils.getInstance().openConnectionStream(url);
            } else {
                is = new FileInputStream(urlString);
            }
            if (urlString.endsWith("gz")) {
                is = new GZIPInputStream(is);
            }
            bufReader = new AsciiLineReader(is);
            loadMapping(bufReader, map);

        } catch (Exception e) {
            log.error("Error loading probe mapping", e);
View Full Code Here

    protected void parseFile(ResourceLocator locator) {

        initializeDataHolders();
        unsortedChromosomes = new HashSet();

        AsciiLineReader reader = null;
        String nextLine = null;
        int lineNumber = 0;
        float[] dataArray = null;

        try {
            reader = ParsingUtils.openAsciiReader(locator);

            if (type == Type.EXPR) {
                reader.readLine(); // Skip header line
            }

            int position = -1;

            while ((nextLine = reader.readLine()) != null) {
                lineNumber++;

                if (nextLine.startsWith("#") || nextLine.startsWith("data") || nextLine.startsWith("browser") || nextLine.trim().length() == 0) {
                    continue;
                    // Skip
                }


                if (nextLine.startsWith("track") && type != Type.CPG) {
                    type = Type.BED_GRAPH;
                    ParsingUtils.parseTrackLine(nextLine, dataset.getTrackProperties());
                    if (dataset.getTrackProperties().getBaseCoord() == TrackProperties.BaseCoord.ZERO) {
                        this.startBase = 0;
                    }

                } else if (nextLine.startsWith("fixedStep")) {
                    type = Type.FIXED;
                    parseStepLine(nextLine);
                    position = start;
                    if (start < lastPosition) {
                        unsortedChromosomes.add(chr);
                    }

                } else if (nextLine.startsWith("variableStep")) {
                    type = Type.VARIABLE;
                    parseStepLine(nextLine);
                    if (start < lastPosition) {
                        unsortedChromosomes.add(chr);
                    }

                } else {
                    // Must be data
                    String[] tokens = Globals.singleTabMultiSpacePattern.split(nextLine);
                    int nTokens = tokens.length;
                    if (nTokens == 0) {
                        continue;
                    }
                    try {
                        if (type.equals(Type.CPG)) {

                            if (nTokens > 3) {
                                chr = tokens[1].trim();
                                if (!chr.equals(lastChr)) {
                                    changedChromosome(dataset, lastChr);
                                }
                                lastChr = chr;

                                int endPosition = -1;
                                try {
                                    endPosition = Integer.parseInt(tokens[2].trim());
                                } catch (NumberFormatException numberFormatException) {
                                    log.error("Column 2 is not a number");

                                    throw new ParserException("Column 2 must be numeric." + " Found: " + tokens[1],
                                            lineNumber, nextLine);
                                }
                                int startPosition = endPosition - 1;

                                if (startPosition < lastPosition) {
                                    unsortedChromosomes.add(chr);
                                }
                                lastPosition = startPosition;

                                float value = Float.parseFloat(tokens[4].trim());
                                if (tokens[3].trim().equals("R")) {
                                    value = -value;
                                }

                                addData(chr, startPosition, endPosition, value);
                            }
                        } else if (type.equals(Type.BED_GRAPH) || type.equals(Type.EXPR)) {

                            if (nTokens > 3) {
                                chr = tokens[chrColumn].trim();
                                if (!chr.equals(lastChr)) {
                                    changedChromosome(dataset, lastChr);
                                    //If we are seeing this chromosome again with something
                                    //in-between, assume it's unsorted
                                    if(dataset.containsChromosome(chr)){
                                        unsortedChromosomes.add(chr);
                                    }

                                }
                                lastChr = chr;

                                int startPosition = -1;
                                try {
                                    startPosition = Integer.parseInt(tokens[startColumn].trim());
                                } catch (NumberFormatException numberFormatException) {
                                    log.error("Column " + (startColumn + 1) + "  is not a number");

                                    throw new ParserException("Column (startColumn + 1) must be numeric." + " Found: " +
                                            tokens[startColumn],
                                            lineNumber, nextLine);
                                }

                                if (startPosition < lastPosition) {
                                    unsortedChromosomes.add(chr);
                                }
                                lastPosition = startPosition;


                                int endPosition = -1;
                                try {
                                    endPosition = Integer.parseInt(tokens[endColumn].trim());
                                    int length = endPosition - startPosition;
                                    updateLongestFeature(length);
                                } catch (NumberFormatException numberFormatException) {
                                    log.error("Column " + (endColumn + 1) + " is not a number");

                                    throw new ParserException("Column " + (endColumn + 1) +
                                            " must be numeric." + " Found: " + tokens[endColumn],
                                            lineNumber, nextLine);
                                }

                                addData(chr, startPosition, endPosition, Float.parseFloat(tokens[dataColumn].trim()));
                            }
                        } else if (type.equals(Type.VARIABLE)) {
                            if (nTokens > 1) {

                                // Per UCSC specification variable and fixed step coordinates are "1" based.
                                // We need to subtract 1 to convert to the internal "zero" based coordinates.
                                int startPosition = Integer.parseInt(tokens[0]) - 1;
                                if (startPosition < lastPosition) {
                                    unsortedChromosomes.add(chr);
                                }
                                lastPosition = startPosition;

                                int endPosition = startPosition + windowSpan;
                                addData(chr, startPosition, endPosition, Float.parseFloat(tokens[1]));
                            }
                        } else {    // Fixed step -- sorting is checked when step line is parsed
                            if (position >= 0) {
                                if (dataArray == null) {
                                    dataArray = new float[nTokens];
                                }
                                for (int ii = 0; ii < dataArray.length; ii++) {
                                    dataArray[ii] = Float.parseFloat(tokens[ii].trim());
                                }
                                int endPosition = position + windowSpan;
                                addData(chr, position, endPosition, dataArray);
                            }
                            position += step;
                            lastPosition = position;
                        }

                    } catch (NumberFormatException e) {
                        log.error(e);
                        throw new ParserException(e.getMessage(), lineNumber, nextLine);
                    }


                }

            }

            // The last chromosome
            changedChromosome(dataset, lastChr);

        } catch (ParserException pe) {
            throw (pe);
        } catch (Exception e) {
            if (nextLine != null && lineNumber != 0) {
                throw new ParserException(e.getMessage(), e, lineNumber, nextLine);
            } else {
                throw new RuntimeException(e);
            }
        } finally {
            if (reader != null) {
                reader.close();
            }
        }
    }
View Full Code Here

        return locators;
    }

    public static MultiFileWrapper parse(ResourceLocator rl) {

        AsciiLineReader reader = null;

        try {
            reader = ParsingUtils.openAsciiReader(rl);

            String name = reader.readLine();
            String serverURL = reader.readLine();

            Map<String, ResourceLocator> locators = new LinkedHashMap();

            String nextLine = null;
            while ((nextLine = reader.readLine()) != null) {
                String[] tokens = nextLine.split("\t");
                String chr = tokens[0];
                String file = tokens[1];
                ResourceLocator loc = new ResourceLocator(file);
                locators.put(chr, loc);
            }

            return new MultiFileWrapper(name, locators);

        }
        catch (IOException e) {
            e.printStackTrace();
            return null;
        }
        finally {
            if (reader != null) {
                reader.close();

            }
        }

    }
View Full Code Here

            SortableRecordCodec codec = new SortableRecordCodec();

            SortingCollection cltn = SortingCollection.newInstance(SortableRecord.class, codec, comparator, maxRecords, tmpDir);

            Parser parser = getParser();
            AsciiLineReader reader = new AsciiLineReader(fis);

            String firstDataRow = writeHeader(reader, writer);
            if (firstDataRow != null) {
                cltn.add(parser.createRecord(firstDataRow));
            }
View Full Code Here

TOP

Related Classes of htsjdk.tribble.readers.AsciiLineReader

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.