Package edu.msu.cme.rdp.classifier.train

Examples of edu.msu.cme.rdp.classifier.train.LineageSequence


        }

        int i = 0;
        Iterator seqIt = seqList.iterator();
        while (seqIt.hasNext()) {
            LineageSequence pSeq = (LineageSequence) seqIt.next();

            GoodWordIterator wordIterator = getPartialSeqIteratorbyWindow(pSeq, window); // full sequence 

            if (wordIterator == null) {
                continue;
            }

            //for leave-one-out testing, we need to remove the word occurrance for
            //the current sequence. This is similiar to hiding a sequence leaf.
            HierarchyTree curTree = genusNodeMap.get((String) pSeq.getAncestors().get(pSeq.getAncestors().size() - 1));


            curTree.hideSeq(wordIterator);
            List result = dm.getBestClasspath( wordIterator, genusNodeMap, false, min_bootstrap_words);
View Full Code Here


             }
         }       
       
        int i = 0;
        while (parser.hasNext()) {
            LineageSequence pSeq = parser.next();
            if (pSeq.getSeqString().length() == 0) {
                continue;
            }
            GoodWordIterator wordIterator = null;
            if (numGoodBases > 0) {
                wordIterator = pSeq.getPartialSeqIteratorbyGoodBases(numGoodBases)// test partial sequences with good words only

            } else {
                wordIterator = new GoodWordIterator(pSeq.getSeqString()); // full sequence 
            }

            if (wordIterator == null || wordIterator.getNumofWords() == 0) {
                //System.err.println(pSeq.getSeqName() + " unable to find good subsequence with length " + numGoodBases);
                continue;
            }

            //for leave-one-out testing, we need to remove the word occurrence for
            //the current sequence. This is similar to hide a sequence leaf.
            HierarchyTree curTree = nodeMap.get((String) pSeq.getAncestors().get(pSeq.getAncestors().size() - 1));
            if ( !hideTaxon){
                if ( curTree.isSingleton()){
                    nodeMap.remove(curTree.getName());
                }else {
                    curTree.hideSeq(wordIterator);
View Full Code Here

        factory.buildTree();
        // get the lineage of the trainSeqFile 
        LineageSequenceParser trainParser = new LineageSequenceParser(new File(trainSeqFile));
        HashMap<String, List<String>> lineageMap = new HashMap<String, List<String>>();
        while (trainParser.hasNext()) {
            LineageSequence seq = (LineageSequence) trainParser.next();
            lineageMap.put(seq.getSeqName(), seq.getAncestors());
           
         }
        trainParser.close();
        NuclSeqMatch sabCal = new NuclSeqMatch(trainSeqFile);
        LineageSequenceParser parser = new LineageSequenceParser(new File(testSeqFile));

        int count = 0;
        while (parser.hasNext()) {
            LineageSequence seq = (LineageSequence) parser.next();
            HashMap<String,HierarchyTree> queryAncestorNodes = getAncestorNodes(factory.getRoot(), seq.getSeqName(), seq.getAncestors());
           TreeSet<KmerMatchCore.BestMatch> matchResults = sabCal.findAllMatches(seq);
           
            short withinLowestRankSab = -1;
            short diffLowestRankSab = -1
            String bestDiffLowestRankMatch = null;
            for (KmerMatchCore.BestMatch match: matchResults){
                if ( match.getBestMatch().getSeqName().equals(seq.getSeqName())) continue;
                short sab = (short)(Math.round(100*match.getSab()));
                HashMap<String,HierarchyTree> matchAncestorNodes = getAncestorNodes(factory.getRoot(), match.getBestMatch().getSeqName(), lineageMap.get(match.getBestMatch().getSeqName()));
                boolean withinTaxon = false;
                for (int i = ranks.size() -1; i >=0; i--){                   
                    HierarchyTree queryTaxon = queryAncestorNodes.get( ranks.get(i));
View Full Code Here

        factory.buildTree();
        // get the lineage of the trainSeqFile 
        LineageSequenceParser trainParser = new LineageSequenceParser(new File(trainSeqFile));
        ArrayList<LineageSequence> trainSeqList = new ArrayList<LineageSequence>();
        while (trainParser.hasNext()) {
            LineageSequence seq = (LineageSequence) trainParser.next();
            trainSeqList.add(seq);
         }
        trainParser.close();
        LineageSequenceParser parser = new LineageSequenceParser(new File(testSeqFile));

        while (parser.hasNext()) {
            LineageSequence seq = (LineageSequence) parser.next();
            HashMap<String,HierarchyTree> queryAncestorNodes = getAncestorNodes(factory.getRoot(), seq.getSeqName(), seq.getAncestors());
           
            for (LineageSequence trainSeq: trainSeqList){
                if ( trainSeq.getSeqName().equals(seq.getSeqName())) continue;
               
                HashMap<String,HierarchyTree> matchAncestorNodes = getAncestorNodes(factory.getRoot(), trainSeq.getSeqName(), trainSeq.getAncestors());
                boolean withinTaxon = false;
                String lowestCommonRank = null;
                for (int i = ranks.size() -1; i >=0; i--){                   
                    HierarchyTree queryTaxon = queryAncestorNodes.get( ranks.get(i));
                    HierarchyTree matchTaxon = matchAncestorNodes.get( ranks.get(i));
                    if ( queryTaxon != null && matchTaxon != null){
                        if ( queryTaxon.getName().equals(matchTaxon.getName())){
                            if ( !withinTaxon){  // if the query and match are not in the same child taxon, add sab to the current taxon
                                lowestCommonRank = ranks.get(i);
                                //(sabCoutMap.get(ranks.get(i)))[sab]++;
                            }
                            withinTaxon = true;                           
                        }else {
                            withinTaxon = false;
                        }
                    }
                } 
               
                if ( lowestCommonRank == null){  // not the rank we care
                    continue;
                }

                // we need to use overlap_trim mode and calculate distance as metric to count insertions, deletions and mismatches.
                PairwiseAlignment result = PairwiseAligner.align(seq.getSeqString().replaceAll("U", "T"), trainSeq.getSeqString().replaceAll("U", "T"), scoringMatrix, mode);
                short sab = (short) (100 - 100*dist.getDistance(result.getAlignedSeqj().getBytes(), result.getAlignedSeqi().getBytes(), 0));
                sabCoutMap.get(lowestCommonRank)[sab]++;
                               
            }          
        }
View Full Code Here

        int totalTest = 0;
        int totalSeq = 0;
        LineageSequenceParser parser = new LineageSequenceParser(source_file);
        while ( parser.hasNext()){
            totalSeq ++;
          LineageSequence pSeq = parser.next();
          if ( !selectedTestSeqIDs.contains(pSeq.getSeqName()) || pSeq.getSeqString().length() == 0){
              continue;
          }
          GoodWordIterator wordIterator = null ;
          if ( partialLength != null ){
                wordIterator = pSeq.getPartialSeqIteratorbyGoodBases(partialLength.intValue())// test partial sequences with good words only

          }else {
                wordIterator = new GoodWordIterator(pSeq.getSeqString()); // full sequence
          }

          if (wordIterator == null || wordIterator.getNumofWords() == 0){
            //System.err.println(pSeq.getSeqName() + " unable to find good sequence");
            continue;
View Full Code Here

        TreeFactory factory = new TreeFactory(new FileReader(tax_file));
        LineageSequenceParser parser = new LineageSequenceParser(source_file);

        while ( parser.hasNext() ){
            LineageSequence pSeq = parser.next();
            if ( !selectedTestSeqIDs.contains(pSeq.getSeqName())){
              factory.addSequence( pSeq);
            }
        }
        parser.close();
View Full Code Here

        // need to use ISO encoding for UNITE
        FileReader tax  = new FileReader(new File(taxFile));
        TreeFactory factory = new TreeFactory(tax);
              
        LineageSequenceParser parser = new LineageSequenceParser(new File(seqFile));
        LineageSequence seq;
        HashMap<String, String> seqMap = new HashMap<String, String>(); // seqID, desc
        while ( parser.hasNext()){
            seq = parser.next();
            factory.addSequence(seq, false); // donot check the kmers
           
            if ( seq.getSeqName().contains("|S00") ){ // rdpID
                String[] values = seq.getSeqName().split("\\|");
                seqMap.put(values[0], seq.getDesc());  
            }else if (seq.getSeqName().contains("|SH") ){  // if it's seq from UNITE, we need to do something with the seqID            
                String[] values = seq.getSeqName().split("\\|");
                seqMap.put(values[1], seq.getDesc());
            }else {
                seqMap.put(seq.getSeqName(), seq.getDesc());
            }
           
        }
        parser.close();
        HierarchyTreeExtend retVal = new HierarchyTreeExtend(factory.getRoot(), trainsetName);
View Full Code Here

        this.show_alignment = show_alignment;
        factory = new TreeFactory(new FileReader(taxFile));
        LineageSequenceParser parser = new LineageSequenceParser(new File(trainseqFile));
       
        while (parser.hasNext()) {
            LineageSequence tmp = (LineageSequence) parser.next();
            seqList.add(tmp);
            factory.addSequence((LineageSequence) parser.next());
        }
        parser.close();
       
View Full Code Here

        HashMap<String, HierarchyTree> nodeMap = new HashMap<String, HierarchyTree>();
        HierarchyTree root = factory.getRoot();
        root.getNodeMap(factory.getLowestRank(), nodeMap);
       
        for ( int i= 0; i < seqList.size(); i++){
            LineageSequence seqx = seqList.get(i);
            HierarchyTree treex = nodeMap.get((String) seqx.getAncestors().get(seqx.getAncestors().size() - 1));
            for ( int j = i+1; j < seqList.size(); j++){
                LineageSequence seqy = seqList.get(j);
                HierarchyTree treey = nodeMap.get((String) seqy.getAncestors().get(seqy.getAncestors().size() - 1));
               
                Taxonomy lowestCommonAnc = findLowestCommonAncestor(treex, treey);              
                PairwiseAlignment result = PairwiseAligner.align(seqx.getSeqString().replaceAll("U", "T"), seqy.getSeqString().replaceAll("U", "T"), scoringMatrix, mode);
                double distance = dist.getDistance(result.getAlignedSeqj().getBytes(), result.getAlignedSeqi().getBytes(), 0);

                if ( show_alignment){
                    System.out.println(">\t" + seqx.getSeqName() + "\t" + seqy.getSeqName() + "\t" + String.format("%.3f", distance) + "\t" + lowestCommonAnc.getHierLevel());
                    System.out.println(result.getAlignedSeqi() + "\n");
                    System.out.println(result.getAlignedSeqj() + "\n");
                }
       
                ArrayList<Double> distList = distanceMap.get(lowestCommonAnc);
View Full Code Here

        TreeFactory factory = new TreeFactory(new FileReader(tax_file));
        LineageSequenceParser parser = new LineageSequenceParser(source_file);
        HashMap<String, HashSet> genusTrainSeqMap = new HashMap<String, HashSet>(); // keep the seqID for each genus

        while ( parser.hasNext() ){
            LineageSequence pSeq = parser.next();
            HierarchyTree genusNode = factory.addSequence( pSeq);
            HashSet<String> genusSeqSet = genusTrainSeqMap.get(genusNode.getName());
            if ( genusSeqSet == null){
                genusSeqSet = new HashSet<String>();
                genusTrainSeqMap.put(genusNode.getName(), genusSeqSet);
            }
            genusSeqSet.add(pSeq.getSeqName());
        }
        parser.close();


        // random select nodes at the give rank level
View Full Code Here

TOP

Related Classes of edu.msu.cme.rdp.classifier.train.LineageSequence

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.