Package au.org.intersect.samifier.domain

Source Code of au.org.intersect.samifier.domain.Genome

package au.org.intersect.samifier.domain;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.log4j.Logger;

import au.org.intersect.samifier.parser.GenomeFileParsingException;

public class Genome {

    private Map<String, GeneInfo> genes;
    private static Logger LOG = Logger.getLogger(Genome.class);
    public Genome() {
        genes = new HashMap<String, GeneInfo>();
    }

    public void addGene(GeneInfo gene) {
        genes.put(gene.getId(), gene);
    }

    public boolean hasGene(String orderedLocusName) {
        return genes.containsKey(orderedLocusName);
    }

    public GeneInfo getGene(String orderedLocusName) {
        if (hasGene(orderedLocusName)) {
            return genes.get(orderedLocusName);
        } else if (hasVirtualProtein(orderedLocusName)) {
            return getGeneForVirtualProtein(orderedLocusName);
        }
        return null;
    }

    private boolean hasVirtualProtein(String orderedLocusName) {
        for (GeneInfo genInfo : genes.values()) {
            if (genInfo.hasVirtualProtein(orderedLocusName)) {
                return true;
            }
        }
        return false;
    }

    private GeneInfo getGeneForVirtualProtein(String orderedLocusName) {
        for (GeneInfo geneInfo : genes.values()) {
            if (geneInfo.hasVirtualProtein(orderedLocusName)) {
                if (verifySimpleGene(geneInfo)) {
                    VirtualProtein vp = geneInfo.getVirtualProtein(orderedLocusName);
                    GeneInfo newGene = new GeneInfo(geneInfo.getChromosome(), geneInfo.getId(), vp.getStartOffset() , vp.getEndOffset(), geneInfo.getDirection());
                    newGene.addLocation(new GeneSequence(geneInfo.getId(), true, vp.getStartOffset(), vp.getEndOffset() , geneInfo.getDirection()));
                    newGene.setFromVirtualProtein(true);
                    newGene.setComments(orderedLocusName + "(" + vp.getStartOffset() + "-" + vp.getEndOffset() + ")");
                    newGene.setOriginalGeneId(vp.getGeneId());
                    return newGene;
                }
            }
        }
        return null;
    }


    private boolean verifySimpleGene(GeneInfo geneInfo) {
        if (geneInfo.getLocations().size() == 1) {
            GeneSequence sequence = geneInfo.getLocations().get(0);
            if (geneInfo.getStart() == sequence.getStart() && geneInfo.getStop() == sequence.getStop()) {
                return true;
            }
        }
        return false;
    }

    public Set<Map.Entry<String, GeneInfo>> getGeneEntries() {
        return genes.entrySet();
    }

    public Collection<GeneInfo> getGenes() {
        return genes.values();
    }

    public Set<String> getLocusNames() {
        return genes.keySet();
    }

    public String toString() {
        StringBuffer out = new StringBuffer();
        for (String orderedLocusName : genes.keySet()) {
            GeneInfo geneInfo = genes.get(orderedLocusName);
            out.append(orderedLocusName);
            out.append(System.getProperty("line.separator"));
            out.append("\t");
            out.append(geneInfo);
            out.append(System.getProperty("line.separator"));
        }
        return out.toString();
    }

    public void verify() throws GenomeFileParsingException{
        for (GeneInfo genInfo : genes.values()) {
            //first we'll sort locations
            int start = genInfo.getStart();
            List <GeneSequence> newEntries = new ArrayList<GeneSequence>();
            for (GeneSequence sequence : genInfo.getLocations()) {
                if (sequence.getStart() < start) {
                    String errorMessage = "Gene " + genInfo.getId() + " in chromosome " +  genInfo.getChromosome() + " has overlaping part at position (" + sequence.getStart() + "," + start + "). Can not continue.";
                    LOG.error(errorMessage);
                    throw new GenomeFileParsingException(errorMessage);
                }
                if (sequence.getStart() != start) {
                    LOG.warn("Gene " + genInfo.getId() + " in chromosome " +  genInfo.getChromosome() + " has missing part at position (" + start + "," + (sequence.getStart() - 1) + "). Assuming non coding sequence.");
                    GeneSequence seq = new GeneSequence(sequence.getParentId(), false, start, sequence.getStart() - 1, sequence.getDirection());
                    newEntries.add(seq);
                }
                start = sequence.getStop() + 1;
            }
            for (GeneSequence location : newEntries) {
                genInfo.addLocation(location);
            }
        }
    }


}
TOP

Related Classes of au.org.intersect.samifier.domain.Genome

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.