Examples of HierarchicalPhrase

joshua.corpus.suffix_array.HierarchicalPhrase
HierarchicalPhrase is a class that represents a single matched hierarchical phrase, and provides the methods necessary for accessing the Pattern that it matches and for computing the intersection used in the suffix array lookups of discontinuous phrases.
For efficiency and space savings, this class should only be used when it is does not make sense to use MatchedHierarchicalPhrases. In cases where many hierarchical phrases share a common pattern, that class should be preferred, as it is able to store the phrases in a much more memory-efficient manner compared to storing a collection of HierarchicalPhrase objects. @author Chris Callison-Burch and Lane Schwartz @since July 31 2008 @version $LastChangedDate:2008-09-18 12:47:23 -0500 (Thu, 18 Sep 2008) $

Examples of joshua.corpus.suffix_array.HierarchicalPhrase


        for (int i=targetSpan.start; i<targetSpan.end; i++) {
          words[i-targetSpan.start] = targetCorpus.getWordID(i);
        }
        
        return new HierarchicalPhrase(
            words, 
            targetSpan,
            Collections.<LabeledSpan>emptyList(),
            targetCorpus);
      }
    }


    
    // Handle the more complex cases...
    List<LabeledSpan> targetNTSpans = new ArrayList<LabeledSpan>();
    int patternSize = targetSpan.size();
    
    int ntIndex = 0;
    
    // For each non terminal in the source, find their corresponding positions in the target span... 
    
    // If the source phrase starts with a nonterminal, we have to handle that NT as a special case
    if (sourceStartsWithNT) {
      
      int firstTerminalIndex = sourcePhrases.getFirstTerminalIndex(sourcePhraseIndex);
      
      if (firstTerminalIndex - sourceSpan.start < minNonterminalSpan) {
        
        return null;
        
      } else {
        // If the source phrase starts with NT, then we need to calculate the span of the first NT
        Span nonterminalSourceSpan = new Span(sourceSpan.start, firstTerminalIndex);
        Span nonterminalTargetSpan = alignments.getConsistentTargetSpan(nonterminalSourceSpan);


        if (nonterminalTargetSpan==null || nonterminalTargetSpan.equals(targetSpan)) return null;


        targetNTSpans.add(new LabeledSpan(nonterminalIDs[ntIndex],nonterminalTargetSpan));
        ntIndex++;
        // the pattern length will be reduced by the length of the non-terminal, and increased by 1 for the NT itself.
        patternSize = patternSize - nonterminalTargetSpan.size() +1;
      }
    }
    
    // Process all internal nonterminals
    for (int i=0, n=sourcePhrases.getNumberOfTerminalSequences()-1; i<n; i++) {
      
      int nextStartIndex = 
        sourcePhrases.getTerminalSequenceStartIndex(sourcePhraseIndex, i+1);
      
      int currentEndIndex =
        sourcePhrases.getTerminalSequenceEndIndex(sourcePhraseIndex, i);
      
      if (nextStartIndex - currentEndIndex < minNonterminalSpan) {
        
        return null;
        
      } else {
        
        Span nonterminalSourceSpan = new Span(currentEndIndex, nextStartIndex);


        Span nonterminalTargetSpan = alignments.getConsistentTargetSpan(nonterminalSourceSpan);


        if (nonterminalTargetSpan==null || nonterminalTargetSpan.equals(targetSpan)) return null;


        targetNTSpans.add(new LabeledSpan(nonterminalIDs[ntIndex],nonterminalTargetSpan));
        ntIndex++;
        patternSize = patternSize - nonterminalTargetSpan.size() + 1;
        
      }
    }
      
    // If the source phrase starts with a nonterminal, we have to handle that NT as a special case
    if (sourceEndsWithNT) {
      
      int lastTerminalIndex = sourcePhrases.getLastTerminalIndex(sourcePhraseIndex);
      
      if (sourceSpan.end - lastTerminalIndex < minNonterminalSpan) {
        
        return null;
        
      } else {


        // If the source phrase ends with NT, then we need to calculate the span of the last NT
        Span nonterminalSourceSpan = new Span(lastTerminalIndex, sourceSpan.end);


        Span nonterminalTargetSpan = alignments.getConsistentTargetSpan(nonterminalSourceSpan);
        if (logger.isLoggable(Level.FINEST)) logger.finest("Consistent target span " + nonterminalTargetSpan + " for NT source span " + nonterminalSourceSpan);




        if (nonterminalTargetSpan==null || nonterminalTargetSpan.equals(targetSpan)) return null;


        targetNTSpans.add(new LabeledSpan(nonterminalIDs[ntIndex],nonterminalTargetSpan));
        ntIndex++;
        patternSize = patternSize - nonterminalTargetSpan.size() + 1;


      }
    }
    
    boolean foundAlignedTerminal = false;
    
    // Create the pattern...
    int[] words = new int[patternSize];
    int patterCounter = 0;
    
    Collections.sort(targetNTSpans);
    
    if (targetNTSpans.get(0).getSpan().start == targetSpan.start) {
      
      int ntCumulativeSpan = 0;
      
      for (LabeledSpan span : targetNTSpans) {
        ntCumulativeSpan += span.size();
      }
      
      if (ntCumulativeSpan >= targetSpan.size()) {
        return null;
      }
      
    } else {
      // if we don't start with a non-terminal, then write out all the words
      // until we get to the first non-terminal
      for (int i = targetSpan.start; i < targetNTSpans.get(0).getSpan().start; i++) {
        if (!foundAlignedTerminal) {
          foundAlignedTerminal = alignments.hasAlignedTerminal(i, sourcePhrases, sourcePhraseIndex);
        }
        words[patterCounter] = targetCorpus.getWordID(i);
        patterCounter++;
      }
    }


    // add the first non-terminal
    words[patterCounter] = targetNTSpans.get(0).getLabel();
    patterCounter++;
    
    // add everything until the final non-terminal
    for(int i = 1; i < targetNTSpans.size(); i++) {
      LabeledSpan NT1 = targetNTSpans.get(i-1);
      LabeledSpan NT2 = targetNTSpans.get(i);
      
      for(int j = NT1.getSpan().end; j < NT2.getSpan().start; j++) {
        if (!foundAlignedTerminal) {
          foundAlignedTerminal = alignments.hasAlignedTerminal(j, sourcePhrases, sourcePhraseIndex);
        }
        words[patterCounter] = targetCorpus.getWordID(j);
        patterCounter++;
      }
      words[patterCounter] = NT2.getLabel();
      patterCounter++;
    }
    
    // if we don't end with a non-terminal, then write out all remaining words
    if(targetNTSpans.get(targetNTSpans.size()-1).getSpan().end != targetSpan.end) {
      // the target pattern starts with a non-terminal
      for(int i = targetNTSpans.get(targetNTSpans.size()-1).getSpan().end; i < targetSpan.end; i++) {
        if (!foundAlignedTerminal) {
          foundAlignedTerminal = alignments.hasAlignedTerminal(i, sourcePhrases, sourcePhraseIndex);
        }
        words[patterCounter] = targetCorpus.getWordID(i);
        patterCounter++;
      }
    }
    
    if (foundAlignedTerminal) {
      return new HierarchicalPhrase(
          words, 
          targetSpan,
          targetNTSpans,
          targetCorpus);
    } else {

View Full Code Here

Examples of joshua.corpus.suffix_array.HierarchicalPhrase

    ArrayList<HierarchicalPhrase> translations = new ArrayList<HierarchicalPhrase>();
    
    // For each sample HierarchicalPhrase
    for (int i=0, n=sourceHierarchicalPhrases.size(); i<n; i+=stepSize) { 


      HierarchicalPhrase translation = getTranslation(sourceHierarchicalPhrases, i);
      if (translation != null) {
        translations.add(translation);
      }
    }

View Full Code Here

Examples of joshua.corpus.suffix_array.HierarchicalPhrase

      
      // If target span and source span are consistent
      if (targetSpan!=null && targetSpan.size()>=sourcePhrase.arity()+1 && targetSpan.size()<=maxPhraseSpan) {
        
        // Construct a translation
        HierarchicalPhrase translation = constructTranslation(sourcePhrase, sourcePhraseIndex, sourceSpan, targetSpan, false, false);
        
        if (translation != null) {
          if (logger.isLoggable(Level.FINEST)) logger.finest("\tCase 1: Adding translation: '" + translation + "' for target span " + targetSpan + " from source span " + sourceSpan);


          return translation;
        } else if (logger.isLoggable(Level.FINER)) {
          logger.finer("No valid translation returned from attempt to construct translation for source span " + sourceSpan + ", target span " + targetSpan);
        }
        
      }
      
    }
    
    // Case 2: If sourcePhrase startsWithNT && !endsWithNT
    else if (sourcePhrase.startsWithNonterminal() && !sourcePhrase.endsWithNonterminal()) {
      
      if (logger.isLoggable(Level.FINER)) logger.finer("Case 2: Source phrase startsWithNT && !endsWithNT");
      
      int sentenceNumber = sourcePhrase.getSentenceNumber(sourcePhraseIndex);
      int startOfSentence = sourceSuffixArray.getCorpus().getSentencePosition(sentenceNumber);
      int startOfTerminalSequence = sourcePhrase.getFirstTerminalIndex(sourcePhraseIndex);
      int endOfTerminalSequence = sourcePhrase.getLastTerminalIndex(sourcePhraseIndex);
      
      // Start by assuming the initial source nonterminal starts one word before the first source terminal 
      Span possibleSourceSpan = new Span(startOfTerminalSequence-1, endOfTerminalSequence);
      
      // Loop over all legal source spans 
      //      (this is variable because we don't know the length of the NT span)
      //      looking for a source span with a consistent translation
      while (possibleSourceSpan.start >= startOfSentence && 
          startOfTerminalSequence-possibleSourceSpan.start<=maxNonterminalSpan && 
          endOfTerminalSequence-possibleSourceSpan.start<=maxPhraseSpan) {
        
        // Get target span
        Span targetSpan = alignments.getConsistentTargetSpan(possibleSourceSpan);


        // If target span and source span are consistent
        if (targetSpan!=null && targetSpan.size()>=sourcePhrase.arity()+1 && targetSpan.size()<=maxPhraseSpan) {


          // Construct a translation
          HierarchicalPhrase translation = constructTranslation(sourcePhrase, sourcePhraseIndex, possibleSourceSpan, targetSpan, true, false);


          if (translation != null) {
            if (logger.isLoggable(Level.FINEST)) logger.finest("\tCase 2: Adding translation: '" + translation + "' for target span " + targetSpan + " from source span " + possibleSourceSpan);


            return translation;
          }


        } 
        
        possibleSourceSpan.start--;
        
      }
      
    }
    
    // Case 3: If sourcePhrase !startsWithNT && endsWithNT
    else if (!sourcePhrase.startsWithNonterminal() && sourcePhrase.endsWithNonterminal()) {
      
      if (logger.isLoggable(Level.FINER)) logger.finer("Case 3: Source phrase !startsWithNT && endsWithNT");
      
      int endOfSentence = sourceSuffixArray.getCorpus().getSentenceEndPosition(sourcePhrase.getSentenceNumber(sourcePhraseIndex));
      int startOfTerminalSequence = sourcePhrase.getFirstTerminalIndex(sourcePhraseIndex);
      int endOfTerminalSequence = sourcePhrase.getLastTerminalIndex(sourcePhraseIndex);
      
      // Start by assuming the initial source nonterminal starts one word after the last source terminal 
      Span possibleSourceSpan = 
        new Span(startOfTerminalSequence, endOfTerminalSequence+1);
        
      // Loop over all legal source spans 
      //      (this is variable because we don't know the length of the NT span)
      //      looking for a source span with a consistent translation
      while (possibleSourceSpan.end <= endOfSentence && 
          possibleSourceSpan.end - endOfTerminalSequence <= maxNonterminalSpan &&
          possibleSourceSpan.size()<=maxPhraseSpan) {
          
        // Get target span
        Span targetSpan = alignments.getConsistentTargetSpan(possibleSourceSpan);


        // If target span and source span are consistent
        if (targetSpan!=null && targetSpan.size()>=sourcePhrase.arity()+1 && targetSpan.size()<=maxPhraseSpan) {


          // Construct a translation
          HierarchicalPhrase translation = constructTranslation(sourcePhrase, sourcePhraseIndex, possibleSourceSpan, targetSpan, false, true);


          if (translation != null) {
            if (logger.isLoggable(Level.FINEST)) logger.finest("\tCase 3: Adding translation: '" + translation + "' for target span " + targetSpan + " from source span " + possibleSourceSpan);


            return translation;
          }


        } 
        
        possibleSourceSpan.end++;
        
      }
      
    }
    
    // Case 4: If sourcePhrase startsWithNT && endsWithNT
    else if (sourcePhrase.startsWithNonterminal() && sourcePhrase.endsWithNonterminal()) {
      
      if (logger.isLoggable(Level.FINER)) logger.finer("Case 4: Source phrase startsWithNT && endsWithNT");
      
      int sentenceNumber = sourcePhrase.getSentenceNumber(sourcePhraseIndex);
      int startOfSentence = sourceSuffixArray.getCorpus().getSentencePosition(sentenceNumber);
      int endOfSentence = sourceSuffixArray.getCorpus().getSentenceEndPosition(sentenceNumber);
      int startOfTerminalSequence = sourcePhrase.getFirstTerminalIndex(sourcePhraseIndex);
      int endOfTerminalSequence = sourcePhrase.getLastTerminalIndex(sourcePhraseIndex);
      
      // Start by assuming the initial source nonterminal 
      //   starts one word before the first source terminal and
      //   ends one word after the last source terminal 
      Span possibleSourceSpan =
        new Span(startOfTerminalSequence-1, endOfTerminalSequence+1);
        
      // Loop over all legal source spans 
      //      (this is variable because we don't know the length of the NT span)
      //      looking for a source span with a consistent translation
      while (possibleSourceSpan.start >= startOfSentence && 
          possibleSourceSpan.end <= endOfSentence && 
          startOfTerminalSequence-possibleSourceSpan.start<=maxNonterminalSpan && 
          possibleSourceSpan.end-endOfTerminalSequence<=maxNonterminalSpan &&
          possibleSourceSpan.size()<=maxPhraseSpan) {
    
        // Get target span
        Span targetSpan = alignments.getConsistentTargetSpan(possibleSourceSpan);


        // If target span and source span are consistent
        if (targetSpan!=null && targetSpan.size()>=sourcePhrase.arity()+1 && targetSpan.size()<=maxPhraseSpan) {


          // Construct a translation
          HierarchicalPhrase translation = constructTranslation(sourcePhrase, sourcePhraseIndex, possibleSourceSpan, targetSpan, true, true);


          if (translation != null) {
            if (logger.isLoggable(Level.FINEST)) logger.finest("\tCase 4: Adding translation: '" + translation + "' for target span " + targetSpan + " from source span " + possibleSourceSpan);


            return translation;

View Full Code Here

Examples of joshua.corpus.suffix_array.HierarchicalPhrase


    int phraseIndex = 0;


    {
      HierarchicalPhrases phrases = getSourcePhrase("it", 0, 1);
      HierarchicalPhrase targetPhrase = getTargetPhrase("das", 0, 1);
      float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase);
      float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase);
      Assert.assertEquals(sourceGivenTarget, 1.0f);  // lex P(it | das)
      Assert.assertEquals(targetGivenSource, 0.25f);// lex P(das | it)
    }


    {
      HierarchicalPhrases phrases = getSourcePhrase("makes", 1, 2);
      HierarchicalPhrase targetPhrase = getTargetPhrase("macht", 1, 2);
      float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase);
      float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase);
      Assert.assertEquals(sourceGivenTarget, 1.0f); // lex P(makes | macht)
      Assert.assertEquals(targetGivenSource, 1.0f);// lex P(macht | makes)
    }


    {
      HierarchicalPhrases phrases = getSourcePhrase("him", 2, 3);
      HierarchicalPhrase targetPhrase = getTargetPhrase("ihn", 2, 3);
      float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase);
      float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase);
      Assert.assertEquals(sourceGivenTarget, 1.0f);
      Assert.assertEquals(targetGivenSource, 1.0f);  
    }


    {
      HierarchicalPhrases phrases = getSourcePhrase("and", 3, 4);
      HierarchicalPhrase targetPhrase = getTargetPhrase("und", 3, 4);
      float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase);
      float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase);
      Assert.assertEquals(sourceGivenTarget, 0.5f); // P(and | und)
      Assert.assertEquals(targetGivenSource, 1.0f);// P(und | and)
    }


    {
      HierarchicalPhrases phrases = getSourcePhrase("it", 4, 5);
      HierarchicalPhrase targetPhrase = getTargetPhrase("es", 4, 5);
      float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase);
      float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase);
      Assert.assertEquals(sourceGivenTarget, 1.0f);  // lex P(it | es)
      Assert.assertEquals(targetGivenSource, 0.75f);// lex P(es | it)
    }


    {
      HierarchicalPhrases phrases = getSourcePhrase("mars", 5, 6);
      HierarchicalPhrase targetPhrase = getTargetPhrase("besch\u00E4digt", 5, 6);
      float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase);
      float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase);
      Assert.assertEquals(sourceGivenTarget, 1.0f);
      Assert.assertEquals(targetGivenSource, 1.0f);  
    }


    {
      HierarchicalPhrases phrases = getSourcePhrase("him", 6, 7);
      HierarchicalPhrase targetPhrase = getTargetPhrase("ihn", 6, 7);
      float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase);
      float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase);
      Assert.assertEquals(sourceGivenTarget, 1.0f);
      Assert.assertEquals(targetGivenSource, 1.0f);  
    }


    {
      HierarchicalPhrases phrases = getSourcePhrase(",", 7, 8);
      HierarchicalPhrase targetPhrase = getTargetPhrase(",", 7, 8);
      float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase);
      float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase);
      Assert.assertEquals(sourceGivenTarget, 1.0f);
      Assert.assertEquals(targetGivenSource, 1.0f);  
    }


    {
      HierarchicalPhrases phrases = getSourcePhrase("it", 8, 9);
      HierarchicalPhrase targetPhrase = getTargetPhrase("es", 8, 9);
      float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase);
      float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase);
      Assert.assertEquals(sourceGivenTarget, 1.0f);  // lex P(it | es)
      Assert.assertEquals(targetGivenSource, 0.75f);// lex P(es | it)
    }


    {
      HierarchicalPhrases phrases = getSourcePhrase("sets", 9, 10);
      HierarchicalPhrase targetPhrase = getTargetPhrase("setzt", 9, 10);
      float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase);
      float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase);
      Assert.assertEquals(sourceGivenTarget, 1.0f);
      Assert.assertEquals(targetGivenSource, 1.0f);  
    }


    {
      HierarchicalPhrases phrases = getSourcePhrase("him", 10, 11);
      HierarchicalPhrase targetPhrase = getTargetPhrase("ihn", 10, 11);
      float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase);
      float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase);
      Assert.assertEquals(sourceGivenTarget, 1.0f);
      Assert.assertEquals(targetGivenSource, 1.0f);  
    }


    {
      HierarchicalPhrases phrases = getSourcePhrase("on", 11, 12);
      HierarchicalPhrase targetPhrase = getTargetPhrase("auf", 11, 12);
      float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase);
      float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase);
      Assert.assertEquals(sourceGivenTarget, 1.0f);
      Assert.assertEquals(targetGivenSource, 1.0f);  
    }


    {
      HierarchicalPhrases phrases = getSourcePhrase("yet", 12, 13);
      HierarchicalPhrase targetPhrase = getTargetPhrase("und", 12, 13);
      float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase);
      float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase);
      Assert.assertEquals(sourceGivenTarget, 0.5f); // P(yet | und)
      Assert.assertEquals(targetGivenSource, 1.0f);// P(und | yet)
    }


    {
      HierarchicalPhrases phrases = getSourcePhrase("it", 13, 14);
      HierarchicalPhrase targetPhrase = getTargetPhrase("es", 13, 14);
      float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase);
      float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase);
      Assert.assertEquals(sourceGivenTarget, 1.0f);  // lex P(it | es)
      Assert.assertEquals(targetGivenSource, 0.75f);// lex P(es | it)
    }


    {
      HierarchicalPhrases phrases = getSourcePhrase("takes", 14, 15);
      HierarchicalPhrase targetPhrase = getTargetPhrase("f\u00FChrt", 14, 15);
      float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase);
      float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase);
      Assert.assertEquals(sourceGivenTarget, 1.0f);
      Assert.assertEquals(targetGivenSource, 1.0f);  
    }


    {
      HierarchicalPhrases phrases = getSourcePhrase("him", 15, 16);
      HierarchicalPhrase targetPhrase = getTargetPhrase("ihn", 15, 16);
      float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase);
      float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase);
      Assert.assertEquals(sourceGivenTarget, 1.0f);
      Assert.assertEquals(targetGivenSource, 1.0f);  
    }


    {
      HierarchicalPhrases phrases = getSourcePhrase("off", 16, 17);
      HierarchicalPhrase targetPhrase = getTargetPhrase("aus", 16, 17);
      float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase);
      float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase);
      Assert.assertEquals(sourceGivenTarget, 1.0f);
      Assert.assertEquals(targetGivenSource, 1.0f);  
    }


    {
      HierarchicalPhrases phrases = getSourcePhrase(".", 17, 18);
      HierarchicalPhrase targetPhrase = getTargetPhrase(".", 17, 18);
      float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase);
      float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase);
      Assert.assertEquals(sourceGivenTarget, 1.0f);
      Assert.assertEquals(targetGivenSource, 1.0f);  
    }


    ///////////


    {
      HierarchicalPhrases phrases = getSourcePhrase("yet it", 12, 14);
      HierarchicalPhrase targetPhrase = getTargetPhrase("und es", 12, 14);
      float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase);
      float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase);
      Assert.assertEquals(sourceGivenTarget, 0.5f * 1.0f);  // lex P(yet it | und es)
      Assert.assertEquals(targetGivenSource, 1.0f * 0.75f);// lex P(und es | yet it)
    }


    ///////////


    {
      HierarchicalPhrases phrases = getSourcePhrase("of the session", 19, 22);
      HierarchicalPhrase targetPhrase = getTargetPhrase("der sitzungsperiode", 19, 21);
      float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase);
      float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase);
      Assert.assertEquals(sourceGivenTarget, 0.5f * 0.5f * 1.0f);  // lex P(of the session | der sitzungsperiode)
      Assert.assertEquals(targetGivenSource, 0.5f*((1.0f/3.0f) + (1.0f/3.0f)) * (1.0f/3.0f));// lex P(der sitzungsperiode | of the session)
    }


    {
      HierarchicalPhrases phrases = getSourcePhrase("thunder ; lightning", 29, 32);
      HierarchicalPhrase targetPhrase = getTargetPhrase("blitzen", 28, 29);
      float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase);
      float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase);
      Assert.assertEquals(sourceGivenTarget, 0.5f * (1.0f/3.0f) * 0.5f);  // lex P(thunder ; lightning | blitzen)
      Assert.assertEquals(targetGivenSource, ((1.0f/2.0f) * (1.0f + 1.0f)));// lex P(blitzen | thunder ; lightning)
    }

View Full Code Here

Examples of joshua.corpus.suffix_array.HierarchicalPhrase

        SymbolTable.X,
        targetVocab.getID("und"),
        targetVocab.getID("es")
      };
    
    HierarchicalPhrase targetPhrase = new HierarchicalPhrase(
        targetWords, 
        new Span(0,5), 
        Collections.<LabeledSpan>emptyList(), 
        targetCorpusArray);

View Full Code Here

Examples of joshua.corpus.suffix_array.HierarchicalPhrase

    return phrases;
  }
  
  private HierarchicalPhrase getTargetPhrase(String targetPhrase, int startIndex, int endIndex) {
    
    return new HierarchicalPhrase(
        targetVocab.getIDs(targetPhrase), 
        new Span(startIndex,endIndex), 
        Collections.<LabeledSpan>emptyList(), 
        targetCorpusArray);

View Full Code Here

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.