Package au.org.intersect.samifier.generator

Source Code of au.org.intersect.samifier.generator.GlimmerFileLocationGenerator

package au.org.intersect.samifier.generator;

import au.org.intersect.samifier.domain.GenomeConstant;
import au.org.intersect.samifier.domain.ProteinLocation;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;

import java.io.File;
import java.io.IOException;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

public class GlimmerFileLocationGenerator implements LocationGenerator {

    private String glimmerFilePath;
    private String chromosome;

    public GlimmerFileLocationGenerator(String glimmerFilePath) {
        this.glimmerFilePath = glimmerFilePath;
    }

    @Override
    public List<ProteinLocation> generateLocations()
            throws LocationGeneratorException {
        File glimmerFile = new File(glimmerFilePath);
        List<ProteinLocation> locations = null;
        try {
            locations = parseGlimmerFile(glimmerFile);
        } catch (IOException e) {
            throw new LocationGeneratorException("Error with file "
                    + glimmerFilePath, e);
        }

        Collections.sort(locations);
        return locations;
    }

    public List<ProteinLocation> parseGlimmerFile(File glimmerFile)
            throws LocationGeneratorException, IOException {
        int lineCount = 0;
        List<ProteinLocation> proteinLocations = new ArrayList<ProteinLocation>();
        boolean wasHeader = false;
        for (String line : FileUtils.readLines(glimmerFile)) {
            lineCount++;
            if (line.startsWith(">")) {
                wasHeader = true;
                parseHeader(line);
                continue;
            }
            if (!wasHeader) continue;
            String[] columns = line.split("\\s+");
            if (columns.length < 5) {
                throw new LocationGeneratorException(
                        "Expecting 5 columns at line: " + lineCount);
            }

            String name = columns[0];
            int firstIndex = Integer.parseInt(columns[1]);
            int secondIndex = Integer.parseInt(columns[2]);
            String direction = columns[3].substring(0, 1);
            String frame = columns[3].substring(1);

            BigDecimal confidenceScore = new BigDecimal(columns[4]);

            if (direction.equals(GenomeConstant.FORWARD_FLAG)) {
                if (firstIndex > secondIndex) {
                    throw new LocationGeneratorException("Error in line : " + line);
                }
                proteinLocations.add(new ProteinLocation(name, firstIndex, secondIndex - firstIndex + 1, direction, frame, confidenceScore, null, chromosome));
            } else if (direction.startsWith(GenomeConstant.REVERSE_FLAG)) {
                if (secondIndex > firstIndex) {
                    throw new LocationGeneratorException("Error in line : " + line);
                }
                proteinLocations.add(new ProteinLocation(name, secondIndex,
                        firstIndex - secondIndex + 1,
                        GenomeConstant.REVERSE_FLAG, frame, confidenceScore, null, chromosome));
            } else {
                throw new LocationGeneratorException(
                        "Unexpected value for direction (4th column) at line: "
                                + lineCount);
            }
        }
        return proteinLocations;
    }

    private void parseHeader(String line) {
        String[] parts = line.split("\\|");
        if (parts.length < 2) return;
        chromosome = parts[3];
    }

}
TOP

Related Classes of au.org.intersect.samifier.generator.GlimmerFileLocationGenerator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.