Examples of edu.msu.cme.rdp.readseq.utils.orientation.GoodWordIterator

Package edu.msu.cme.rdp.readseq.utils.orientation

Examples of edu.msu.cme.rdp.readseq.utils.orientation.GoodWordIterator

edu.msu.cme.rdp.readseq.utils.orientation.GoodWordIterator

            leaveCount = 1;
        } else {
            leaveCount++;
        }


        GoodWordIterator iterator = new GoodWordIterator(pSeq.getSeqString());
        if (wordOccurrence == null) {
            wordOccurrence = new int[iterator.getMask() + 1];
        }


        // create a temporary list and initialize the value to be -1;
        int[] wordList = new int[iterator.getNumofWords()];


        for (int i = 0; i < wordList.length; i++) {
            wordList[i] = -1;
        }


        int numUniqueWords = 0;  // indicate the number of unique words
        // duplicated words in one sequence are only counted once
        while (iterator.hasNext()) {
            int index = iterator.next();  // index is the actual integer representation of the word


            if (!isWordExist(wordList, index)) {
                wordList[numUniqueWords] = index;
                wordOccurrence[index]++;
                numUniqueWords++;

View Full Code Here

    }
    
  /* partial sequences with good words only
   */
  public GoodWordIterator getPartialSeqIteratorbyGoodBases(int num_good_bases) throws IOException{
    GoodWordIterator wordIterator = null;
    String sequence = super.getSeqString();
    int size = sequence.length();
    if ( size < num_good_bases) {
        return wordIterator;
    }


    double d = Math.random();
    int loc = (int)Math.round((double)(size * d ) );
    String newSeq = sequence.substring(loc,size) + marker + sequence.substring(0,loc);


    int numGoodBases = 0;


    StringReader in = new StringReader(newSeq);


    int offset = 0;


    int c;
    while ( (c = in.read()) != -1 ){
      if ( numGoodBases == num_good_bases){
        break;
      }
      int charIndex = GoodWordIterator.getCharIndex(c);


      if ( charIndex != -1){
        numGoodBases ++;
      }else {
        numGoodBases = 0;
      }


      offset ++;
    }


    in.close();
    if (numGoodBases == num_good_bases){
      String partialSeq = newSeq.substring(offset - num_good_bases, offset);
      //System.err.println(">" + this.getName() + " " + this.getLineage() + "\n" + partialSeq + " NUM_GOOD_BASES=" + numGoodBases);
      wordIterator = new GoodWordIterator(partialSeq);
       if ( wordIterator.getNumofWords() == 0){
            wordIterator = null;
        }
    }


    return wordIterator;

View Full Code Here

        int i = 0;
        Iterator seqIt = seqList.iterator();
        while (seqIt.hasNext()) {
            LineageSequence pSeq = (LineageSequence) seqIt.next();


            GoodWordIterator wordIterator = getPartialSeqIteratorbyWindow(pSeq, window); // full sequence  


            if (wordIterator == null) {
                continue;
            }

View Full Code Here

        // at least half of the window size.


        if (seqString.length() < FindWindowFrame.window_size / 2) {
            return null;
        }
        GoodWordIterator wordIterator = new GoodWordIterator(seqString);
        if (wordIterator.getNumofWords() == 0) {
            wordIterator = null;
        }
        return wordIterator;
    }

View Full Code Here

            numOfLeaves++;
        }
        
        if ( !initWordOccurrence) return;


        GoodWordIterator iterator = new GoodWordIterator(pSeq.getSeqString());
        if (wordOccurrence == null) {
            wordOccurrence = new short[iterator.getMask() + 1];
        }


        // create a temporary list and initialize the value to be -1;
        int[] wordList = new int[iterator.getNumofWords()];


        for (int i = 0; i < wordList.length; i++) {
            wordList[i] = -1;
        }


        int numUniqueWords = 0;  // indicate the number of unique words
        // duplicated words in one sequence are only counted once
        while (iterator.hasNext()) {
            int index = iterator.next();  // index is the actual integer representation of the word
            
            if (!isWordExist(wordList, index)) {
                wordList[numUniqueWords] = index;
                wordOccurrence[index]++;
                numUniqueWords++;

View Full Code Here

        while (parser.hasNext()) {
            LineageSequence pSeq = parser.next();
            if (pSeq.getSeqString().length() == 0) {
                continue;
            }
            GoodWordIterator wordIterator = null;
            if (numGoodBases > 0) {
                wordIterator = pSeq.getPartialSeqIteratorbyGoodBases(numGoodBases);  // test partial sequences with good words only


            } else {
                wordIterator = new GoodWordIterator(pSeq.getSeqString()); // full sequence  
            }


            if (wordIterator == null || wordIterator.getNumofWords() == 0) {
                //System.err.println(pSeq.getSeqName() + " unable to find good subsequence with length " + numGoodBases);
                continue;
            }


            //for leave-one-out testing, we need to remove the word occurrence for

View Full Code Here

            totalSeq ++;
          LineageSequence pSeq = parser.next();
          if ( !selectedTestSeqIDs.contains(pSeq.getSeqName()) || pSeq.getSeqString().length() == 0){
              continue;
          }
          GoodWordIterator wordIterator = null ;
          if ( partialLength != null ){
                wordIterator = pSeq.getPartialSeqIteratorbyGoodBases(partialLength.intValue());  // test partial sequences with good words only


          }else {
                wordIterator = new GoodWordIterator(pSeq.getSeqString()); // full sequence
          }


          if (wordIterator == null || wordIterator.getNumofWords() == 0){
            //System.err.println(pSeq.getSeqName() + " unable to find good sequence");
            continue;
          }
        
          List result = dm.getBestClasspath( wordIterator, genusNodeMap, useSeed, min_bootstrap_words);

View Full Code Here




        //test hide and unhide sequence 1 which belong to child1
        String seq1 = "AAAAAAAAAGUCACCCCCCCCUGA";     // belong to child1      


        GoodWordIterator seq_iterator = new GoodWordIterator(seq1);
        assertEquals(2, c1.getNumOfLeaves());
        c1.hideSeq(seq_iterator);
        assertEquals(1, c1.getNumOfLeaves());
        assertEquals(1, c2.getNumOfLeaves());
        assertEquals(2, root.getNumOfLeaves());


        assertEquals(0, c1.getWordOccurrence(0)); //AAAAAAAA
        assertEquals(1, c1.getWordOccurrence(2)); //AAAAAAAG
        // assertEquals(2, root.getWordOccurrence(39) );   //AAAAAGUC


        c1.unhideSeq(seq_iterator);
        assertEquals(2, c1.getNumOfLeaves());
        assertEquals(3, root.getNumOfLeaves());


        assertEquals(1, c1.getWordOccurrence(0)); //AAAAAAAA     
        // assertEquals(2, root.getWordOccurrence(65535) );   //CCCCCCCC


        //est hide and unhide sequence 2 which belong to child2
        String seq2 = "AAAAUAAAAAGUCCCCCCCCUG"; // belong to child2
        seq_iterator = new GoodWordIterator(seq2);
        c2.hideSeq(seq_iterator);
        assertEquals(2, c1.getNumOfLeaves());
        assertEquals(0, c2.getNumOfLeaves());
        assertEquals(2, root.getNumOfLeaves());

View Full Code Here

        
        // test the first sequence
        File queryReader = new File(System.class.getResource("/test/classifier/testNBClassifierSet.fasta").getFile());        
        parser = new LineageSequenceParser(queryReader);
        LineageSequence pSeq = parser.next();
        GoodWordIterator iterator = new GoodWordIterator(pSeq.getSeqString());
        NBClassifier classifier = new NBClassifier(factory, iterator.getWordArr(), nodeList, useSeed, min_bootstrap_words);


        assertEquals(pSeq.getSeqName(), "XG1_child1");        
        ValidationClassificationResult result = classifier.assignClass();
        assertEquals("G1", ((HierarchyTree) result.getBestClass()).getName());
        assertTrue(0.1 > result.getPosteriorProb());




        pSeq = parser.next();
        // test the 3rd getSequence()
        pSeq = parser.next();
        assertEquals(pSeq.getSeqName(), "XG2_child1");
        iterator = new GoodWordIterator(pSeq.getSeqString());
        classifier = new NBClassifier(factory, iterator.getWordArr(), nodeList, useSeed, min_bootstrap_words);
        result = classifier.assignClass();
        assertEquals("G2", ((HierarchyTree) result.getBestClass()).getName());
        assertTrue(0.2 > result.getPosteriorProb());




        pSeq = parser.next();
        // test the 5th getSequence()
        pSeq = parser.next();
        assertEquals(pSeq.getSeqName(), "XPh2G6_child1");
        iterator = new GoodWordIterator(pSeq.getSeqString());
        classifier = new NBClassifier(factory, iterator.getWordArr(), nodeList, useSeed, min_bootstrap_words);
        result = classifier.assignClass();
        assertEquals("G1", ((HierarchyTree) result.getBestClass()).getName());
        assertTrue(0.2 > result.getPosteriorProb());


        //test the 8th sequence in G7, it is the same as the 9th sequence in G8
        // the classifier should randomly choose a genus (either G7 or G8) because the score will be tie
        parser.next();
        parser.next();
        pSeq = parser.next();
        assertEquals(pSeq.getSeqName(), "XPh2G7_child1");
        iterator = new GoodWordIterator(pSeq.getSeqString());
        int G7_count = 0;
        int G8_count = 0;
        for ( int run = 0; run < DecisionMaker.NUM_OF_RUNS; run++){
            classifier = new NBClassifier(factory, iterator.getWordArr(), nodeList, useSeed, min_bootstrap_words);
            result = classifier.assignClass();
            if ( ((HierarchyTree) result.getBestClass()).getName().equals("G7")){
                G7_count ++;
            }else if ( ((HierarchyTree) result.getBestClass()).getName().equals("G8")){
                G8_count ++;

View Full Code Here

  private String seq = "AAAAAAAAAG-CCCCCCCCUGAGGGUUACnAA";
  private GoodWordIterator wordIt;
  
  public GoodWordIteratorTest(java.lang.String testName) throws IOException{
    super(testName);    
    wordIt = new GoodWordIterator(seq);
  }

View Full Code Here

0 1

TOP

Related Classes of edu.msu.cme.rdp.readseq.utils.orientation.GoodWordIterator

edu.msu.cme.rdp.classifier.train.GoodWordIteratorTest

edu.msu.cme.rdp.classifier.train.LineageSequence

edu.msu.cme.rdp.classifier.train.RawHierarchyTree

edu.msu.cme.rdp.classifier.train.validation.crossvalidate.CrossValidate

edu.msu.cme.rdp.classifier.train.validation.GoodWordIteratorTest

edu.msu.cme.rdp.classifier.train.validation.HierarchyTree

edu.msu.cme.rdp.classifier.train.validation.HierarchyTreeTest

edu.msu.cme.rdp.classifier.train.validation.leaveoneout.LeaveOneOutTester

edu.msu.cme.rdp.classifier.train.validation.movingwindow.WindowTester

edu.msu.cme.rdp.classifier.train.validation.NBClassifierTest

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.