Package edu.msu.cme.rdp.classifier.train

Source Code of edu.msu.cme.rdp.classifier.train.LineageSequence

/*
* ParsedRawSequence.java
*
* Copyright 2006 Michigan State University Board of Trustees
*
* Created on June 25, 2002, 10:28 AM
*/
package edu.msu.cme.rdp.classifier.train;

import edu.msu.cme.rdp.readseq.readers.Sequence;
import edu.msu.cme.rdp.readseq.utils.orientation.GoodWordIterator;
import java.io.IOException;
import java.io.StringReader;
import java.util.List;


/**
* A ParsedRawSequence holds the data for a raw sequence.
* @author  wangqion
* @version
*/
public class LineageSequence extends Sequence {

    private List<String> ancestors;  // the highest ranked ancestor first if it's the complete lineage, or hold the taxid of the immediate parent taxon
    private static final String marker = "#";

    /** Creates new ParsedRawSequence. */
    public LineageSequence(String n, List<String> al, String seq) {
        super(n, "", seq);
        ancestors = al;
        if(ancestors.isEmpty()) {
            throw new IllegalArgumentException("No lineage data for sequence " + n);
        }
    }

    /**
     * Returns the list of the ancestor taxa, with the highest ranked taxon first.
     */
    public List<String> getAncestors() {
        return ancestors;
    }
   
    public String getDesc(){
        StringBuilder desc = new StringBuilder();
        desc.append(ancestors.get(0));
        for ( int i = 1; i < ancestors.size(); i++){
            desc.append(";" + ancestors.get(i));
        }
        return desc.toString();
    }
   
  /* partial sequences with good words only
   */
  public GoodWordIterator getPartialSeqIteratorbyGoodBases(int num_good_bases) throws IOException{
    GoodWordIterator wordIterator = null;
    String sequence = super.getSeqString();
    int size = sequence.length();
    if ( size < num_good_bases) {
        return wordIterator;
    }

    double d = Math.random();
    int loc = (int)Math.round((double)(size * d ) );
    String newSeq = sequence.substring(loc,size) + marker + sequence.substring(0,loc);

    int numGoodBases = 0;

    StringReader in = new StringReader(newSeq);

    int offset = 0;

    int c;
    while ( (c = in.read()) != -1 ){
      if ( numGoodBases == num_good_bases){
        break;
      }
      int charIndex = GoodWordIterator.getCharIndex(c);

      if ( charIndex != -1){
        numGoodBases ++;
      }else {
        numGoodBases = 0;
      }

      offset ++;
    }

    in.close();
    if (numGoodBases == num_good_bases){
      String partialSeq = newSeq.substring(offset - num_good_bases, offset);
      //System.err.println(">" + this.getName() + " " + this.getLineage() + "\n" + partialSeq + " NUM_GOOD_BASES=" + numGoodBases);
      wordIterator = new GoodWordIterator(partialSeq);
       if ( wordIterator.getNumofWords() == 0){
            wordIterator = null;
        }
    }

    return wordIterator;

  }
}
TOP

Related Classes of edu.msu.cme.rdp.classifier.train.LineageSequence

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.