Package edu.msu.cme.rdp.classifier.train.validation

Examples of edu.msu.cme.rdp.classifier.train.validation.HierarchyTree


        //for each sequence with name, and or true path
        List resultList = new ArrayList();

        DecisionMaker dm = new DecisionMaker(factory);
        HierarchyTree root = factory.getRoot();

        HashMap<String, HierarchyTree> genusNodeMap = new HashMap<String, HierarchyTree>();
        factory.getRoot().getNodeMap(testRank, genusNodeMap);

        if (genusNodeMap.isEmpty()) {
            throw new IllegalArgumentException("\nThere is no node in GENUS level!");
        }

        int i = 0;
        Iterator seqIt = seqList.iterator();
        while (seqIt.hasNext()) {
            LineageSequence pSeq = (LineageSequence) seqIt.next();

            GoodWordIterator wordIterator = getPartialSeqIteratorbyWindow(pSeq, window); // full sequence 

            if (wordIterator == null) {
                continue;
            }

            //for leave-one-out testing, we need to remove the word occurrance for
            //the current sequence. This is similiar to hiding a sequence leaf.
            HierarchyTree curTree = genusNodeMap.get((String) pSeq.getAncestors().get(pSeq.getAncestors().size() - 1));


            curTree.hideSeq(wordIterator);
            List result = dm.getBestClasspath( wordIterator, genusNodeMap, false, min_bootstrap_words);

            ValidClassificationResultFacade resultFacade = new ValidClassificationResultFacade(pSeq, result);
            resultFacade.setLabeledNode(curTree);
            compareClassificationResult(resultFacade);

            resultList.add(resultFacade);
            i++;
            // recover the wordOccurrence of the genus node, unhide
            curTree.unhideSeq(wordIterator);

        }

        displayStat();
    }
View Full Code Here


    /** Compare the assigned path with the true path for the test getSequence(),
     * counts the number of correct classes and the number of getSequence()s for
     * each path level.
     */
    private void compareClassificationResult(ValidClassificationResultFacade resultFacade) {
        HierarchyTree trueParent = resultFacade.getLabeledNode();
        List hitList = resultFacade.getRankAssignment();

        // compare the true taxon and the hit taxon with same rank.
        while (trueParent != null) {

            if (!trueParent.isSingleton()) {
                ValidationClassificationResult hit = null;
                for (int i = 0; i < hitList.size(); i++) {
                    ValidationClassificationResult tmp = (ValidationClassificationResult) hitList.get(i);
                    if ((trueParent.getTaxonomy().getHierLevel()).equals(tmp.getBestClass().getTaxonomy().getHierLevel())) {
                        hit = tmp;
                        break;
                    }
                }

                if (trueParent.getTaxonomy().getHierLevel().equals(testRank)) {
                    trueParent.incNumTotalTestedseq();
                }
                boolean correct = false;


                if (hit != null && trueParent.getTaxonomy().getTaxID() == hit.getBestClass().getTaxonomy().getTaxID()) {
                    correct = true;
                } else {
                    if (trueParent.getTaxonomy().getHierLevel().equals(testRank)) {
                        trueParent.incMissCount();
                    }
                    resultFacade.setMissedRank(trueParent.getTaxonomy().getHierLevel());
                }
                if (hit != null) {
                    increaseCount(hit, trueParent.getTaxonomy().getHierLevel(), correct, num_hierLevel);
                }

            } else {
                // System.err.println(" singleton: " + trueParent.getName() + " " + trueParent.getTaxonomy().getHierLevel() );
            }

            trueParent = trueParent.getParent();

        }

    }
View Full Code Here

        LineageSequenceParser parser = new LineageSequenceParser(source_file);
        HashMap<String, HashSet> genusTrainSeqMap = new HashMap<String, HashSet>(); // keep the seqID for each genus

        while ( parser.hasNext() ){
            LineageSequence pSeq = parser.next();
            HierarchyTree genusNode = factory.addSequence( pSeq);
            HashSet<String> genusSeqSet = genusTrainSeqMap.get(genusNode.getName());
            if ( genusSeqSet == null){
                genusSeqSet = new HashSet<String>();
                genusTrainSeqMap.put(genusNode.getName(), genusSeqSet);
            }
            genusSeqSet.add(pSeq.getSeqName());
        }
        parser.close();


        // random select nodes at the give rank level
        ArrayList<HierarchyTree> nodeList = new ArrayList<HierarchyTree>();
        factory.getRoot().getNodeList(rank, nodeList);      

        Set<HierarchyTree> selectedNodes = new HashSet<HierarchyTree>();

        int testCount = (int) (((float) nodeList.size()) * fraction);
        while (selectedNodes.size() < testCount){
            int rdmIndex = (int) (Math.floor(Math.random()* (double)nodeList.size()));
            selectedNodes.add(nodeList.get(rdmIndex));
            //System.err.println("selected " + nodeList.get( rdmIndex).getName() + "\t" + nodeList.get(rdmIndex).getNumOfLeaves());
            nodeList.remove(rdmIndex);
        }

        // select the seqIDs
        Set<String> selectedSeqIDs = new HashSet<String>();

        for (HierarchyTree node : selectedNodes){
            ArrayList<HierarchyTree> tmp = new ArrayList<HierarchyTree>();
            node.getNodeList(Taxonomy.GENUS, tmp);
            for ( HierarchyTree genusNode: tmp){
                selectedSeqIDs.addAll(genusTrainSeqMap.get(genusNode.getName()));
            }
        }
        return selectedSeqIDs;

    }
View Full Code Here

        HashMap<String,HierarchyTree> ancestorNodes = new HashMap<String,HierarchyTree>();
        if ( !ancestors.get(0).equals(root.getName())){
            throw new IllegalArgumentException("Sequence " + seqName + " does not have the same root taxon" + root.getName());
        }
        ancestorNodes.put(root.getTaxonomy().getHierLevel(), root);
        HierarchyTree curParent = root;
        for (int i = 1; i < ancestors.size(); i++){
           
            HierarchyTree  node = curParent.getSubclassbyName(ancestors.get(i));
            if ( node == null){
                throw new IllegalArgumentException("Sequence " + seqName + " cannot find ancestor node: " + ancestors.get(i));
            }
            ancestorNodes.put(node.getTaxonomy().getHierLevel().toLowerCase(), node);
            curParent = node;
        }       
        return ancestorNodes;
    }
View Full Code Here

                if ( match.getBestMatch().getSeqName().equals(seq.getSeqName())) continue;
                short sab = (short)(Math.round(100*match.getSab()));
                HashMap<String,HierarchyTree> matchAncestorNodes = getAncestorNodes(factory.getRoot(), match.getBestMatch().getSeqName(), lineageMap.get(match.getBestMatch().getSeqName()));
                boolean withinTaxon = false;
                for (int i = ranks.size() -1; i >=0; i--){                   
                    HierarchyTree queryTaxon = queryAncestorNodes.get( ranks.get(i));
                    HierarchyTree matchTaxon = matchAncestorNodes.get( ranks.get(i));
                    if ( queryTaxon != null && matchTaxon != null){
                        if ( queryTaxon.getName().equals(matchTaxon.getName())){
                            if ( !withinTaxon){  // if the query and match are not in the same child taxon, add sab to the current taxon
                                (sabCoutMap.get(ranks.get(i)))[sab]++;
                            }
                            withinTaxon = true;                           
                        }else {
                            withinTaxon = false;
                        }
                    }
                   
                } 
               
                // find within or different lowest level rank sab score, be either species or genus or any rank
                HierarchyTree speciesQueryTaxon = queryAncestorNodes.get( ranks.get(ranks.size()-1));   
                HierarchyTree speciesMatchTaxon = matchAncestorNodes.get( ranks.get(ranks.size()-1));
               
                if ( speciesQueryTaxon != null && speciesMatchTaxon != null && speciesQueryTaxon.getName().equals(speciesMatchTaxon.getName())){
                    withinLowestRankSab = sab >= withinLowestRankSab ? sab: withinLowestRankSab;
                }else {
                   
                    if ( sab >= diffLowestRankSab ){
                        bestDiffLowestRankMatch = match.getBestMatch().getSeqName();
View Full Code Here

               
                HashMap<String,HierarchyTree> matchAncestorNodes = getAncestorNodes(factory.getRoot(), trainSeq.getSeqName(), trainSeq.getAncestors());
                boolean withinTaxon = false;
                String lowestCommonRank = null;
                for (int i = ranks.size() -1; i >=0; i--){                   
                    HierarchyTree queryTaxon = queryAncestorNodes.get( ranks.get(i));
                    HierarchyTree matchTaxon = matchAncestorNodes.get( ranks.get(i));
                    if ( queryTaxon != null && matchTaxon != null){
                        if ( queryTaxon.getName().equals(matchTaxon.getName())){
                            if ( !withinTaxon){  // if the query and match are not in the same child taxon, add sab to the current taxon
                                lowestCommonRank = ranks.get(i);
                                //(sabCoutMap.get(ranks.get(i)))[sab]++;
                            }
                            withinTaxon = true;                           
View Full Code Here

        }
        outStream.println();
        for ( int i = 0; i < ranks.size(); i++){
            outStream.print(ranks.get(i));
            for ( int t = 0; t < this.trainsets.size(); t++){                  
                HierarchyTree root = trainsets.get(t).getRoot();
                HashMap<String, HierarchyTree> nodeMap = new HashMap<String, HierarchyTree>();
                root.getNodeMap(ranks.get(i), nodeMap);
                outStream.print("\t" + nodeMap.size());
            }
            outStream.println();
        }
        outStream.print("All Seqs");
        for ( HierarchyTreeExtend factory: this.trainsets ){
            HierarchyTree root = factory.getRoot();
            outStream.print("\t" + root.getTotalSeqs());
        }
        outStream.println();
       
        // data for Venn Diagram if less than 3 sets      
        if ( this.trainsets.size() == 2 || this.trainsets.size() == 3){
            outStream.println("\n## data for Venn Diagram");
            for ( int i = 0; i < ranks.size(); i++){
                outStream.println("\n## Rank " + ranks.get(i));
                ArrayList<Set<String>> taxaList = new ArrayList<Set<String>>();
                for ( int t = 0; t < this.trainsets.size(); t++){                  
                    HierarchyTree root = trainsets.get(t).getRoot();
                    HashMap<String, HierarchyTree> nodeMap = new HashMap<String, HierarchyTree>();
                    root.getNodeMap(ranks.get(i), nodeMap);
                    taxaList.add(nodeMap.keySet());
                }
               
                Set<String> tempSet = new HashSet<String>();
                tempSet.addAll(taxaList.get(0));
View Full Code Here

        calDist();
    }
  
    private void calDist() throws OverlapCheckFailedException{
        HashMap<String, HierarchyTree> nodeMap = new HashMap<String, HierarchyTree>();
        HierarchyTree root = factory.getRoot();
        root.getNodeMap(factory.getLowestRank(), nodeMap);
       
        for ( int i= 0; i < seqList.size(); i++){
            LineageSequence seqx = seqList.get(i);
            HierarchyTree treex = nodeMap.get((String) seqx.getAncestors().get(seqx.getAncestors().size() - 1));
            for ( int j = i+1; j < seqList.size(); j++){
                LineageSequence seqy = seqList.get(j);
                HierarchyTree treey = nodeMap.get((String) seqy.getAncestors().get(seqy.getAncestors().size() - 1));
               
                Taxonomy lowestCommonAnc = findLowestCommonAncestor(treex, treey);              
                PairwiseAlignment result = PairwiseAligner.align(seqx.getSeqString().replaceAll("U", "T"), seqy.getSeqString().replaceAll("U", "T"), scoringMatrix, mode);
                double distance = dist.getDistance(result.getAlignedSeqj().getBytes(), result.getAlignedSeqi().getBytes(), 0);
View Full Code Here

    }
       
    private static Taxonomy findLowestCommonAncestor(HierarchyTree treex, HierarchyTree treey){
        ArrayList<HierarchyTree> ancestorx = new ArrayList<HierarchyTree>();
        ArrayList<HierarchyTree> ancestory = new ArrayList<HierarchyTree>();
        HierarchyTree parent = treex.getParent();
        ancestorx.add(treex);
        while(parent != null) {
            ancestorx.add(parent);
            parent = parent.getParent();
        }
        ancestory.add(treey);
        parent = treey.getParent();
        while(parent != null) {
            ancestory.add(parent);
            parent = parent.getParent();
        }
       
        Taxonomy lowestCommonAnc = ancestorx.get(ancestorx.size() -1).getTaxonomy();
        for ( int i = 2; i <= ancestorx.size() ; i++){
            if (  (ancestory.size() -i ) >= 0 ){
View Full Code Here

TOP

Related Classes of edu.msu.cme.rdp.classifier.train.validation.HierarchyTree

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.