Package edu.msu.cme.rdp.classifier.io

Source Code of edu.msu.cme.rdp.classifier.io.TreeFileParser

/*
* TreeFileParser.java
*
* Copyright 2006 Michigan State University Board of Trustees
*
* Created on September 11, 2003, 11:12 AM
*/
package edu.msu.cme.rdp.classifier.io;

import edu.msu.cme.rdp.classifier.HierarchyTree;
import edu.msu.cme.rdp.classifier.TrainingDataException;
import edu.msu.cme.rdp.classifier.utils.HierarchyVersion;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.Stack;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.ParserConfigurationException;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

/**
* A parser to parse a reader containing taxonomic training information.
* Note: The first TreeNode is the root TreeNode.
* @author  wangqion
*/
public class TreeFileParser extends org.xml.sax.helpers.DefaultHandler {

    private Stack treeNodeStack = new Stack();
    private HierarchyTree root;
    private String trainRank = null;

    /** Creates a new instance of TreeFileParser. */
    public TreeFileParser() {
    }
   
    public String getTrainRank(){
        return trainRank;
    }

    /** Reads from a reader that contains the information for each treenode.
     * Creates all the HierarchyTrees and returns the root of the trees.
     * Note: The first TreeNode is the root TreeNode.
     * The version information should be obtained from the other files first.
     */
    public HierarchyTree createTree(Reader in, HierarchyVersion version) throws IOException, TrainingDataException {
        BufferedReader infile = new BufferedReader(in);
        // the first line contains the version information
        // check if it's the same as version from the other training files

        String line = infile.readLine();
        if (line != null) {
            HierarchyVersion thisVersion = new HierarchyVersion(line);
            int trainsetNo = thisVersion.getTrainsetNo();

            if (thisVersion.getVersion() == null) {
                throw new TrainingDataException("Error: There is no version information "
                        + "in the bergeyTree file");
            }
            if (version == null) {
                version = thisVersion;
            } else if (!version.getVersion().equals(thisVersion.getVersion()) || version.getTrainsetNo() != thisVersion.getTrainsetNo()) {
                throw new TrainingDataException("Error: The version information in the bergeyTree file is different from the version of the other training files.");
            }
        }

        while ((line = infile.readLine()) != null) {
            load(line);
        }
        infile.close();
        return root;
    }

    private void load(String document) throws TrainingDataException, IOException {
        try {
            SAXParserFactory factory = SAXParserFactory.newInstance();
            SAXParser saxParser = factory.newSAXParser();
            saxParser.parse(new InputSource(new StringReader(document)), this);
        } catch (ParserConfigurationException e) {
            throw new TrainingDataException(e);
        } catch (SAXException e) {
            throw new TrainingDataException(e);
        }
    }

    public void startElement(String namespaceURI,
            String lName, // local name
            String qName, // qualified name
            Attributes attrs) throws SAXException {
        try {
            // the older training file does not contain the copy number info.
            if (attrs == null || (attrs.getLength() != 6 && attrs.getLength() != 7)) {
                throw new TrainingDataException("Error: the attribute for element: "
                        + qName + " is missing or do not have exactly number of attributes");
            }
            int taxid = Integer.parseInt(attrs.getValue(1));
            int parentTaxid = Integer.parseInt(attrs.getValue(3));
            int leaveCount = Integer.parseInt(attrs.getValue(4));
            int genusIndex = Integer.parseInt(attrs.getValue(5));
            double copyNumber = 0.0f
            if ( attrs.getLength() > 6){
                copyNumber = Double.parseDouble(attrs.getValue(6));
            }

            HierarchyTree aTree = new HierarchyTree(attrs.getValue(0), taxid, attrs.getValue(2), leaveCount, genusIndex, copyNumber);
            // The first TreeNode is the root
            if (root == null) {
                aTree.addParent(null);
                root = aTree;
            } else {
                HierarchyTree parent = null;
                while (!treeNodeStack.empty()) {
                    HierarchyTree topNode = (HierarchyTree) treeNodeStack.peek();
                    if (topNode.getTaxid() == parentTaxid) {
                        parent = topNode;
                        break;
                    }
                    treeNodeStack.pop();
                }
                if (parent == null) {
                    throw new TrainingDataException("Error: The parent for treenode name=: "
                            + attrs.getValue(0) + " rank=" + attrs.getValue(2) + " parentTaxid=" + parentTaxid
                            + " can not be found in the input file");
                }
                //System.err.println("parent: " + parent.getName() + " root=" + root.getName());
                aTree.addParent(parent);
            }

            // if this node is not genus node, push it to the stack
            if (genusIndex == -1) {
                treeNodeStack.push(aTree);
            }else if ( trainRank == null){
                trainRank = attrs.getValue(2);
            }
        } catch (TrainingDataException e) {
            throw new SAXException(e);
        }
    }

    public void endElement(String str, String str1, String str2) throws org.xml.sax.SAXException {
    }

    public void error(org.xml.sax.SAXParseException e) throws org.xml.sax.SAXException {
        throw new SAXException(e);
    }
}
TOP

Related Classes of edu.msu.cme.rdp.classifier.io.TreeFileParser

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.