Package edu.msu.cme.rdp.multicompare.taxon

Examples of edu.msu.cme.rdp.multicompare.taxon.MCTaxon


     * Input files are sequence files
     */
    public MultiClassifierResult multiCompare(List<MCSample> samples, float confidence, PrintWriter assign_out,
            ClassificationResultFormatter.FORMAT format, int min_bootstrap_words) throws IOException {
        HierarchyTree sampleTreeRoot  = classifierFactory.getRoot();
        ConcretRoot<MCTaxon> root = new ConcretRoot<MCTaxon>(new MCTaxon(sampleTreeRoot.getTaxid(), sampleTreeRoot.getName(), sampleTreeRoot.getRank()) );

        Classifier classifier = classifierFactory.createClassifier();
        List<String> badSequences = new ArrayList();
        Map<String, Long> seqCountMap = new HashMap();
        Map<String, String> seqClassificationMap = new HashMap(); // holds the classification results to replace the biom metadata
View Full Code Here


     * taxonFilter indicates which taxon to match for the detail assignment output
     */
    public MultiClassifierResult multiClassificationParser(List<MCSample> samples, float confidence, PrintWriter assign_out,
            ClassificationResultFormatter.FORMAT format, String printRank, HashSet<String> taxonFilter) throws IOException {
        HierarchyTree sampleTreeRoot  = classifierFactory.getRoot();
        ConcretRoot<MCTaxon> root = new ConcretRoot<MCTaxon>(new MCTaxon(sampleTreeRoot.getTaxid(), sampleTreeRoot.getName(), sampleTreeRoot.getRank()) );
        List<String> badSequences = new ArrayList();
        Map<String, Long> seqCountMap = new HashMap();

        for (MCSample sample : samples) {
            ClassificationParser parser = ((MCSampleResult) sample).getClassificationParser(classifierFactory);
View Full Code Here

        int taxid = assignment.getTaxid();
        if (unclassified) {
            taxid = Taxon.getUnclassifiedId(taxid);
        }

        MCTaxon ret = root.getChildTaxon(taxid);
        if (ret == null) {
            ret = new MCTaxon(assignment.getTaxid(), assignment.getName(), assignment.getRank(), unclassified);
            root.addChild(ret, parentId);

            Long val = seqCountMap.get(ret.getRank());
            if (val == null) {
                val = 0L;
            }
            seqCountMap.put(ret.getRank(), val + 1);
            ret.setLineage(lineage.toString() + ret.getName() + ";" + ret.getRank() + ";");
        }

        return ret;
    }
View Full Code Here

   
    private void processClassificationResult(ClassificationResult result, MCSample sample, ConcretRoot<MCTaxon> root, float conf, Map<String, Long> seqCountMap) {
        RankAssignment lastAssignment = null;
        RankAssignment twoAgo = null;
        StringBuffer lineage = new StringBuffer();       
        MCTaxon taxon = null;
        MCTaxon cntaxon = null;
        HashSet<MCTaxon> tempTaxonSet = new HashSet<MCTaxon>();
        int parentId = root.getRootTaxid();   
        int count = sample.getDupCount(result.getSequence().getSeqName());
        for (RankAssignment assignment : (List<RankAssignment>) result.getAssignments()) {
            boolean stop = false;
View Full Code Here

        out.println("EstimateS File prepared by RDP MultiCompare Library Comparision Tool");
        out.println(nodeCount + "\t" + samples.size());
    }

    public boolean visitNode(VisitInfo<MCTaxon> info) {
        MCTaxon taxon = info.getTaxon();
        if(!taxon.getRank().equals(rank))
            return true;

        StringBuffer sampleBuf = new StringBuffer();

        int seqCount = 0;
        for(MCSample sample : samples) {
            sampleBuf.append(taxon.getCount(sample) + "\t");
            seqCount += taxon.getCount(sample);
        }

        if(seqCount > 0)
            out.println(sampleBuf);
View Full Code Here

    }

    @Override
    public boolean visitNode(VisitInfo<MCTaxon> info) {
        StringBuffer sampleBuf = new StringBuffer();
        MCTaxon taxon = info.getTaxon();

        int seqCount = 0;
        for(MCSample sample : samples) {
            if ( !printCNcorrected ){
                double d = taxon.getCount(sample);
                if ( isDoubleInt(d)){ // we need to write out integers in case other third party tools expect an integer
                    sampleBuf.append("\t").append( (long) d );
                }else {
                    sampleBuf.append("\t").append(String.format(dformat, d) );
                }
            } else {
                sampleBuf.append("\t").append(String.format(dformat, taxon.getCopyCorrectedCount(sample)));
            }
            seqCount += taxon.getCount(sample);
        }

        if(seqCount > 0 || !ommitEmpty)
            out.println(taxon.getTaxid() +"\t" + taxon.getLineage() + "\t" + taxon.getName() + "\t" + taxon.getRank() + sampleBuf);

        return true;
    }
View Full Code Here

        //taxid  lineage  name  rank sample sample .....
        // the first line should be the root, we need to handle this specially because a previous bug printed out the root taxon differenly
        values = oneHierBlock.get(1);
        if( result == null){
            // the first line should be the root taxon
             ConcretRoot root = new ConcretRoot<MCTaxon>(new MCTaxon(Integer.parseInt(values[0]), values[2], values[3]) );
             result = new MultiClassifierResult(root);
             result.addSampleList(curSampleList);
        }
        MCTaxon curTaxon = (MCTaxon)(result.getRoot().getRootTaxonHodler().getTaxon());
        // add counts
        for ( int i = offset; i< values.length; i++){
            curTaxon.incCount(curSampleList.get(i-offset), Double.parseDouble(values[i]));
        }
       
        for ( int ln = 2; ln < oneHierBlock.size(); ln++){
            values = oneHierBlock.get(ln);
            int taxid = Integer.parseInt(values[0]);
           
            // find the parent taxon
            String[] lineage = values[1].split(";");
            TaxonHolder parentTaxon = result.getRoot().getRootTaxonHodler();
            for ( int i= 2; i< lineage.length-2; i+=2){ // the first taxon should be the root
                TaxonHolder temp = parentTaxon.getImediateChildTaxon(lineage[i]);
                if ( temp == null){
                    throw new IOException("Error: Something is wrong with input file, can not find parent node " + lineage[i] + " in line: "+ values[1] );
                }
                parentTaxon = temp;
            }

            //check if the name and rank match the existing one, in case the result from different version
            TaxonHolder tempChild = parentTaxon.getImediateChildTaxon(values[2]);
            if ( tempChild != null){
                curTaxon = (MCTaxon) tempChild.getTaxon();

                if ( curTaxon.getTaxid() != taxid ){
                    throw new IOException("Error: Something is wrong with input file: taxon name " + values[2] + " with taxid " + taxid
                    + " does not match previous processed taxon " + curTaxon.getName() + " with taxid " + curTaxon.getTaxid()
                    + ". Possibly from different training sets ??");
                }
            }else {
                curTaxon = new MCTaxon(taxid, values[2], values[3], false);
                result.getRoot().addChild(curTaxon, parentTaxon.getTaxon().getTaxid());
                curTaxon.setLineage(values[1]);
            }
            // add counts
            for ( int i = offset; i< values.length; i++){
                curTaxon.incCount(curSampleList.get(i-offset), Double.parseDouble(values[i]));
            }
        }

    }
View Full Code Here

TOP

Related Classes of edu.msu.cme.rdp.multicompare.taxon.MCTaxon

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.