Examples of AsciiLineReader


Examples of htsjdk.tribble.readers.AsciiLineReader

    public void testSeek() throws Exception {
        String expectedLine = "chr22\t14000000\t15000000\trecombRate\t0.182272\t0.20444\t0.160104\t0\t0\t0\t0\t0\t0";
        File testFile = new File(TestUtils.DATA_DIR + "igv/recombRate.igv.txt");
        SeekableFileStream is = new SeekableFileStream(testFile);
        is.seek(149247);
        AsciiLineReader reader = new AsciiLineReader(is);
        String nextLine = reader.readLine();
        assertEquals(expectedLine, nextLine);
    }
View Full Code Here

Examples of htsjdk.tribble.readers.AsciiLineReader

        if (bufferedInputStream instanceof PositionalBufferedStream) {
            pbs = (PositionalBufferedStream) bufferedInputStream;
        } else {
            pbs = new PositionalBufferedStream(bufferedInputStream);
        }
        return new AsciiLineReaderIterator(new AsciiLineReader(pbs));
    }
View Full Code Here

Examples of htsjdk.tribble.readers.AsciiLineReader

    // tumreads  normreads  class  span  site1  site2  quality  score
    // 48      0      long_range  4195715  3'-UTR of ENST0000037493
    public List<FeatureTrack> loadTracks(ResourceLocator locator, Genome genome) {

        List<FeatureTrack> tracks = new ArrayList();
        AsciiLineReader reader = null;
        List<htsjdk.tribble.Feature> features = new ArrayList(5000);

        int parseColumn = -1;
        String nextLine = null;
        int rowCounter = 0;

        try {
            reader = ParsingUtils.openAsciiReader(locator);
            setColumns(reader.readLine());
            rowCounter++;
            while ((nextLine = reader.readLine()) != null) {

               String[] tokens = Globals.tabPattern.split(nextLine, -1);
               rowCounter++;

               int nTokens = tokens.length;
                if (nTokens > pos2Column) {
                    int index = Integer.parseInt(tokens[numColumn]);

                    String chr1 = genome.getChromosomeAlias(tokens[chr1Column]);
                    int pos1 = Integer.parseInt(tokens[pos1Column]);
                    String str1 = tokens[str1Column];
                    Strand strand1 = (str1.equals("0") || str1.equals("(+)")) ? Strand.POSITIVE : Strand.NEGATIVE;

                    String chr2 = genome.getChromosomeAlias(tokens[chr2Column]);
                    int pos2 = Integer.parseInt(tokens[pos2Column]);
                    String str2 = tokens[str2Column];
                    Strand strand2 = (str2.equals("0") || str2.equals("(+)")) ? Strand.POSITIVE : Strand.NEGATIVE;

                    DRangerFeature feature = new DRangerFeature(chr1, pos1, strand1, chr2, pos2, strand2);
                    feature.setIndex(index);

                    if (tumreadsColumn > 0) {
                        feature.setTumreads(toInt(tokens[tumreadsColumn]));
                    }
                    if (normreadsColumn > 0) {
                        feature.setNormreads(toInt(tokens[normreadsColumn]));
                    }
                    if (classColumn > 0) {
                        feature.setFeatureClass(tokens[classColumn]);
                    }
                    if (spanColumn > 0) {
                        feature.setSpan(toInt(tokens[spanColumn]));
                    }
                    if (site1Column > 0) {
                        feature.setSite1(tokens[site1Column]);
                    }
                    if (site2Column > 0) {
                        feature.setSite2(tokens[site2Column]);
                    }
                    if (qualityColumn > 0) {
                        feature.setQuality(toInt(tokens[qualityColumn]));
                    }
                    if (scoreColumn > 0) {
                        feature.setScore(toInt(tokens[scoreColumn]));
                    }

                    features.add(feature);
                }
            }
        } catch (NumberFormatException ne) {
            throw new ParserException("Column " + parseColumn + " must be a numeric value", rowCounter, nextLine);
        } catch (Exception e) {
            log.error("Error parsing dRanger file", e);
            if (nextLine != null && rowCounter != 0) {
                throw new ParserException(e.getMessage(), e, rowCounter, nextLine);
            } else {
                throw new RuntimeException(e);
            }

        } finally {
            if (reader != null) {
                reader.close();
            }
        }

        FeatureTrack track = new FeatureTrack(locator, new FeatureCollectionSource(features, genome));
        track.setName(locator.getTrackName());
View Full Code Here

Examples of htsjdk.tribble.readers.AsciiLineReader

     * @return
     */
    public static GisticTrack loadData(ResourceLocator locator) {

        // TODO -- handle remote resource
        AsciiLineReader reader = null;

        String nextLine = null;
        int rowCounter = 0;
        try {
            Genome genome = GenomeManager.getInstance().getCurrentGenome();

            reader = ParsingUtils.openAsciiReader(locator);

            // Parse header
            reader.readLine();
            rowCounter++;

            // Parse data
            // parameters to track chromosome breaks. Used in wholeGenome case.
            GisticTrack track = new GisticTrack(locator);
            track.setName(locator.getTrackName());
            List<GisticScore> scores = new ArrayList();

            nextLine = reader.readLine();
            rowCounter++;

            while ((nextLine != null) && (nextLine.length() > 0)) {
                String[] tokens = nextLine.split("\t");

                GisticScore.Type type = getType(tokens[0].trim());
                String chr = genome.getChromosomeAlias(tokens[1].trim());

                int start = -1;
                try {
                    start = Integer.parseInt(tokens[2]);
                }
                catch (NumberFormatException ne) {
                    throw new ParserException("Column 3 must be a numeric value.", rowCounter, nextLine);
                }

                int end = -1;
                try {
                    end = Integer.parseInt(tokens[3]);
                }
                catch (NumberFormatException ne) {
                    throw new ParserException("Column 4 must be a numeric value.", rowCounter, nextLine);
                }

                float qValue = 0;
                try {
                    qValue = Float.parseFloat(tokens[4]);
                } catch (NumberFormatException numberFormatException) {
                    if (tokens[4].toLowerCase().equals("inf")) {
                        qValue = Float.POSITIVE_INFINITY;
                    } else {
                        qValue = Float.NaN;
                    }
                }

                try {
                    float gScore = Float.parseFloat(tokens[5]);
                    scores.add(new GisticScore(chr, start, end, qValue, gScore, type));

                }
                catch (Exception ex) {
                    log.error("Skipping line: " + nextLine, ex);
                }

                nextLine = reader.readLine();
                rowCounter++;
            }

            if (scores.isEmpty()) {
                throw new RuntimeException("No gistic scores were found.");
            } else {
                track.setScores(scores);
                computeWholeGenome(track, genome);
                return track;
            }

        } catch (FileNotFoundException e) {
            log.error("File not found: " + locator.getPath());
            throw new RuntimeException(e);
        }
        catch (ParserException e) {
            throw new RuntimeException(e);
        }
        catch (Exception e) {
            log.error("Exception when loading: " + locator.getPath(), e);
            if (rowCounter != 0) {
                throw new ParserException(e.getMessage(), rowCounter, nextLine);
            } else {
                throw new RuntimeException(e);
            }
        } finally {
            if (reader != null) {
                reader.close();
            }
        }

    }
View Full Code Here

Examples of htsjdk.tribble.readers.AsciiLineReader

    public MUTCodec(String path, Genome genome) {
        super(Mutation.class);
        this.path = path;
        this.genome = genome;
        try {
            LineIterator reader = new LineIteratorImpl(new AsciiLineReader(ParsingUtils.openInputStream(path)));
            readActualHeader(reader);
        } catch (IOException e) {
            log.error(e.getMessage(), e);
        }
    }
View Full Code Here

Examples of htsjdk.tribble.readers.AsciiLineReader

     *          If the fasta file cannot be indexed, for instance
     *          because the lines are of an uneven length
     */
    public static void createIndexFile(String inputPath, String outputPath) throws DataLoadException, IOException {

        AsciiLineReader reader = null;
        BufferedWriter writer = null;

        try {
            log.info("Creating index file at " + outputPath);
            reader = new AsciiLineReader(ParsingUtils.openInputStream(inputPath));
            writer = new BufferedWriter(new FileWriter(outputPath));
            String line = null;
            String curContig = null;
            Set<String> allContigs = new HashSet<String>();
            int basesPerLine = -1, bytesPerLine = -1;
            long location = 0, size = 0, lastPosition = 0;

            int basesThisLine, bytesThisLine;
            int numInconsistentLines = -1;
            boolean haveTasks = true;
            //Number of blank lines in the current contig.
            //-1 for not set
            int numBlanks = -1;
            int lastBlankLineNum = -1;
            int curLineNum = 0;


            //We loop through, generating a new FastaSequenceIndexEntry
            //every time we see a new header line, or when the file ends.
            while (haveTasks) {
                line = reader.readLine();
                curLineNum++;

                if (line == null || line.startsWith(">")) {
                    //The last line can have a different number of bases/bytes
                    if (numInconsistentLines >= 2) {
                        throw new DataLoadException("Fasta file has uneven line lengths in contig " + curContig, inputPath);
                    }

                    //Done with old contig
                    if (curContig != null) {
                        writeLine(writer, curContig, size, location, basesPerLine, bytesPerLine);
                    }

                    if (line == null) {
                        haveTasks = false;
                        break;
                    }

                    //Header line
                    curContig = WHITE_SPACE.split(line)[0];
                    curContig = curContig.substring(1);
                    if(allContigs.contains(curContig)){
                        throw new DataLoadException("Contig '" + curContig + "' found multiple times in file.", inputPath);
                    }else{
                        allContigs.add(curContig);
                    }

                    //Should be starting position of next line
                    location = reader.getPosition();
                    size = 0;
                    basesPerLine = -1;
                    bytesPerLine = -1;
                    numInconsistentLines = -1;
                } else {
                    basesThisLine = line.length();
                    bytesThisLine = (int) (reader.getPosition() - lastPosition);

                    //Calculate stats per line if first line, otherwise
                    //check for consistency
                    if (numInconsistentLines < 0) {
                        basesPerLine = basesThisLine;
                        bytesPerLine = bytesThisLine;
                        numInconsistentLines = 0;
                        numBlanks = 0;
                    } else {
                        if ((basesPerLine != basesThisLine || bytesPerLine != bytesThisLine) && basesThisLine > 0) {
                            numInconsistentLines++;
                        }
                    }

                    //Empty line. This is allowed if it's at the end of the contig);
                    if (basesThisLine == 0) {
                        numBlanks++;
                        lastBlankLineNum = curLineNum;
                    } else if (numBlanks >= 1) {
                        throw new DataLoadException(String.format("Blank line at line number %d, followed by data line at %d, in contig %s\nBlank lines are only allowed at the end of a contig", lastBlankLineNum, curLineNum, curContig), inputPath);
                    }

                    size += basesThisLine;
                }
                lastPosition = reader.getPosition();
            }
        } finally {
            if (reader != null) reader.close();
            if (writer != null) writer.close();

        }

    }
View Full Code Here

Examples of htsjdk.tribble.readers.AsciiLineReader

     * @return
     * @throws IOException
     */
    public GWASData parseDescriptions(GWASData gData, String hitChr, long hitLocation, int searchStartRow) throws IOException {

        AsciiLineReader reader = null;
        String nextLine = null;

        boolean hitFound = false;
        int cacheSize = gData.getDescriptionCache().getMaxSize();
        int rowCounter = 0;

        try {
            reader = ParsingUtils.openAsciiReader(locator);

            // Parse columns line
            String headerLine = reader.readLine();
            if (!this.columns.parseHeader(headerLine))
                throw new ParserException("Error while parsing columns line.", 0, nextLine);

            gData.getDescriptionCache().setHeaderTokens(headerLine);


            // Number of lines to cache after the hit (tries to estimate that half of the cache will be filled with data from data before the query data point, and half with data after the query data point
            int cacheAfter = cacheSize / 2;
            int cacheCounter = 0;

            while (cacheCounter < cacheAfter && (nextLine = reader.readLine()) != null && (nextLine.trim().length() > 0)) {

                nextLine = nextLine.trim();
                rowCounter++;

                if (rowCounter >= searchStartRow) {

                    GWASEntry entry = parseLine(nextLine, rowCounter);
                    if(entry == null) continue;

                    // See if chr and nucleotide position match to our query data point
                    if (entry.chr.equals(hitChr) && entry.start == hitLocation) {
                        hitFound = true;
                    }

                    // Add descriptions to cache
                    if (hitFound) {
                        cacheCounter++;
                    }
                    gData.getDescriptionCache().add(entry.chr, entry.start, entry.p, nextLine);

                }
            }

        } catch (ParserException e) {
            throw e;
        } catch (Exception e){
            if (nextLine != null && rowCounter != 0) {
                throw new ParserException(e.getMessage(), e, rowCounter, nextLine);
            } else {
                throw new RuntimeException(e);
            }
        } finally{
            if(reader != null) reader.close();
        }

        return gData;

    }
View Full Code Here

Examples of htsjdk.tribble.readers.AsciiLineReader

    }

    public GWASData parse() throws IOException {

        AsciiLineReader reader = null;
        String nextLine = null;
        int rowCounter = 0;

        Set<String> chromos = new HashSet<String>();
        GWASEntry lastEntry = null;

        try {
            reader = ParsingUtils.openAsciiReader(locator);

            String headerLine = reader.readLine();
            if (!this.columns.parseHeader(headerLine))
                throw new ParserException("Error while parsing columns line.", 0, nextLine);

            GWASData gData = new GWASData();

            int indexCounter = 0;

            while ((nextLine = reader.readLine()) != null && (nextLine.trim().length() > 0)) {

                nextLine = nextLine.trim();
                rowCounter++;

                GWASEntry entry = parseLine(nextLine, rowCounter);
                if (entry == null) continue;

                gData.addLocation(entry.chr, entry.start);
                gData.addValue(entry.chr, entry.p);

                //Check that file is sorted
                if(lastEntry != null){
                    if(entry.chr.equals(lastEntry.chr)){
                        if(entry.start < lastEntry.start){
                            throw new ParserException("File is not sorted, found start position lower than previous", rowCounter);
                        }
                    }else{
                        if(chromos.contains(entry.chr)){
                            throw new ParserException("File is not sorted; chromosome repeated", rowCounter);
                        }
                        chromos.add(entry.chr);
                    }
                }

                indexCounter++;

                int indexSize = 10000;
                if (indexCounter == indexSize) {
                    gData.getFileIndex().add((int) reader.getPosition());
                    indexCounter = 0;
                }

                lastEntry = entry;
            }
            return gData;

        } catch (Exception e) {
            if (nextLine != null && rowCounter != 0) {
                throw new ParserException(e.getMessage(), e, rowCounter, nextLine);
            } else {
                throw new RuntimeException(e);
            }
        } finally {
            if(reader != null) reader.close();
        }
    }
View Full Code Here

Examples of htsjdk.tribble.readers.AsciiLineReader

        dataStartColumn = 2;
    }

    public Collection<RNAIDataSource> parse() {
        // Create a buffer for the string split utility.  We use  a custom utility as opposed
        AsciiLineReader reader = null;
        List dataSources = null;
        String nextLine = null;
        InputStream probeMappingStream = null;

        try {
            reader = ParsingUtils.openAsciiReader(dataFileLocator);

            String headerLine = null;

            // Skip header rows

            nextLine = reader.readLine();
            nextLine = reader.readLine();

            headerLine = reader.readLine();

            // Parse column headings
            int skip = 1;

            String[] tokens = Globals.tabPattern.split(headerLine, -1);
            int nTokens = tokens.length;

            String description = (nTokens > descriptionColumn)
                    ? new String(tokens[descriptionColumn]) : null;

            int nColumns = (nTokens - dataStartColumn) / skip;
            String[] columnHeadings = new String[nColumns];
            for (int i = 0; i < nColumns; i++) {
                String heading = tokens[dataStartColumn + i * skip].replace('\"', ' ').trim();
                columnHeadings[i] = heading;
            }


            Map<String, String[]> rnaiProbeMap = getProbeMap();

            HashMap<String, HashMap<String, Float>> sampleGeneScoreMap = new HashMap();
            while ((nextLine = reader.readLine()) != null) {
                tokens = Globals.tabPattern.split(nextLine, -1);
                nTokens = tokens.length;
                String probeId = new String(tokens[0]);
                float[] values = new float[nColumns];

                String[] identifiers = (String[]) rnaiProbeMap.get(probeId);
                String identifier = null;
                if (identifiers == null || identifiers.length == 0) {
                    log.info("Could not find mapping for: " + probeId);
                    continue;
                } else {
                    identifier = identifiers[0];
                }

                NamedFeature gene = FeatureDB.getFeature(identifier.toUpperCase());
                if (gene == null) {
                    log.debug("Unknown identifier: " + identifier);
                    continue;
                }

                for (int i = 0; i < nColumns; i++) {
                    try {
                        int dataIndex = dataStartColumn + i * skip;

                        // If we are out of value tokens, or the cell is blank, assign NAN to the cell.
                        if ((dataIndex >= nTokens) || (tokens[dataIndex].length() == 0)) {
                            values[i] = Float.NaN;
                        } else {
                            values[i] = Float.parseFloat(tokens[dataIndex]);
                        }

                        String sample = columnHeadings[i];
                        RNAIHairpinValue hairpin = new RNAIHairpinValue(probeId, values[i]);
                        RNAIHairpinCache.getInstance().addHairpinScore(sample, gene.getName(),
                                hairpin);

                        HashMap<String, Float> geneScoreMap = sampleGeneScoreMap.get(sample);

                        if (geneScoreMap == null) {
                            geneScoreMap = new HashMap();
                            sampleGeneScoreMap.put(sample, geneScoreMap);
                        }

                        Float geneScore = geneScoreMap.get(gene.getName());
                        if (geneScore == null) {
                            geneScore = values[i];
                            geneScoreMap.put(gene.getName(), geneScore);
                        } else {

                            geneScore = new Float(Math.min(values[i], geneScore.floatValue()));
                            geneScoreMap.put(gene.getName(), geneScore);
                        }
                    } catch (NumberFormatException numberFormatException) {

                        // This is an expected condition.  IGV uses NaN to
                        // indicate non numbers (missing data values)
                        values[i] = Float.NaN;
                    }
                }
            }

            dataSources = computeGeneScores(sampleGeneScoreMap);

        } catch (IOException ex) {
            log.error("Error parsing RNAi file", ex);
            throw new RuntimeException(ex);
        } finally {
            if (probeMappingStream != null) {
                try {
                    probeMappingStream.close();
                } catch (IOException e) {
                    log.error("Error closing probe mapping stream", e);
                }
            }
            if (reader != null) {
                reader.close();
            }
        }

        return dataSources;
    }
View Full Code Here

Examples of htsjdk.tribble.readers.AsciiLineReader

            URL url = new URL(RNAI_MAPPING_URL);

            InputStream probeMappingStream = null;
            try {
                probeMappingStream = new GZIPInputStream(HttpUtils.getInstance().openConnectionStream(url));
                AsciiLineReader br = new AsciiLineReader(probeMappingStream);

                ProbeToLocusMap.getInstance().loadMapping(br, rnaiProbeMap);
            } catch (Exception e) {
                throw new LoadResourceFromServerException(e.getMessage(), RNAI_MAPPING_URL, e.getClass().getSimpleName());
            } finally {
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.