Package joshua.prefix_tree

Source Code of joshua.prefix_tree.LMAdaptingRuleExtractor

/* This file is part of the Joshua Machine Translation System.
*
* Joshua is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*/
package joshua.prefix_tree;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Map;

import joshua.corpus.Corpus;
import joshua.corpus.Phrase;
import joshua.corpus.alignment.Alignments;
import joshua.corpus.lexprob.LexicalProbabilities;
import joshua.corpus.suffix_array.HierarchicalPhrase;
import joshua.corpus.suffix_array.Pattern;
import joshua.corpus.suffix_array.Suffixes;
import joshua.corpus.vocab.SymbolTable;
import joshua.corpus.vocab.Vocabulary;
import joshua.decoder.JoshuaConfiguration;
import joshua.decoder.ff.FeatureFunction;
import joshua.decoder.ff.lm.NGramLanguageModel;
import joshua.decoder.ff.lm.buildin_lm.LMGrammarJAVA;

/**
*
*
* @author Lane Schwartz
*/
public class LMAdaptingRuleExtractor extends HierarchicalRuleExtractor {

  final float[] weights;
 
  /**
     * Constructs a rule extractor for
     * Hiero-style hierarchical phrase-based translation.
   *
   * @param suffixArray        Suffix array representing the
   *                           source language corpus
   * @param targetCorpus       Corpus array representing the
   *                           target language corpus
   * @param alignments         Represents alignments between words in the
   *                           source corpus and the target corpus
   * @param lexProbs           Lexical translation probability table
   * @param sampleSize         Specifies the maximum number of rules
   *                           that will be extracted for any source pattern
   * @param maxPhraseSpan      Max span in the source corpus of any
   *                           extracted hierarchical phrase
   * @param maxPhraseLength    Maximum number of terminals plus nonterminals
   *                           allowed in any extracted hierarchical phrase
   * @param minNonterminalSpan Minimum span in the source corpus of any
   *                           nonterminal in an extracted hierarchical
   *                           phrase
   * @param maxNonterminalSpan Maximum span in the source corpus of any
   *                           nonterminal in an extracted hierarchical
   *                           phrase
   * @throws IOException
   */
  public LMAdaptingRuleExtractor(
      String largeArpaLM, String testArpaLM, int lmOrder,
      Suffixes suffixArray,
      Suffixes targetSuffixArray,
      Alignments alignments,
      LexicalProbabilities lexProbs,
      ArrayList<FeatureFunction> models,
      int sampleSize,
      int maxPhraseSpan,
      int maxPhraseLength,
      int minNonterminalSpan,
      int maxNonterminalSpan) throws IOException {
   
    super(suffixArray,
        targetSuffixArray, alignments,
        lexProbs, models, sampleSize,
        maxPhraseSpan, maxPhraseLength,
        minNonterminalSpan, maxNonterminalSpan);
   
    SymbolTable vocab = new Vocabulary();
   
    Corpus corpus = suffixArray.getCorpus();
   
    NGramLanguageModel largeLM = new LMGrammarJAVA(
        vocab,
        lmOrder,
        largeArpaLM,
        JoshuaConfiguration.use_left_equivalent_state,
        JoshuaConfiguration.use_right_equivalent_state);
   
    NGramLanguageModel testLM = new LMGrammarJAVA(
        vocab,
        lmOrder,
        testArpaLM,
        JoshuaConfiguration.use_left_equivalent_state,
        JoshuaConfiguration.use_right_equivalent_state);
   
    this.weights = new float[corpus.getNumSentences()];
   
    for (int i=0, n=corpus.getNumSentences(); i<n; i++) {
      Phrase sentence = corpus.getSentence(i);
      int[] words = sentence.getWordIDs();
      double largeProbLM = largeLM.ngramLogProbability(words);
      double testProbLM = testLM.ngramLogProbability(words);
      double ratio = testProbLM - largeProbLM;
      this.weights[i] = (float) ratio;
    }
  }
 
  @Override
  protected float[] calculateFeatureValues(Pattern sourcePattern, int sourcePatternCount, HierarchicalPhrase translation, Map<Pattern,Integer> counts, float totalTranslationCount) {
    float[] featureValues = super.calculateFeatureValues(sourcePattern, sourcePatternCount, translation, counts, totalTranslationCount);
   
    return featureValues;
  }
 
}
TOP

Related Classes of joshua.prefix_tree.LMAdaptingRuleExtractor

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.