Package joshua.corpus.vocab

Examples of joshua.corpus.vocab.SymbolTable


   
    //??????????????????????????????????????
    int baseline_lm_feat_id = 0;
    //??????????????????????????????????????
   
    SymbolTable p_symbolTable = new BuildinSymbol(null);
   
    KBestExtractor kbest_extractor = null;
    int topN = 300;//TODO
    boolean extract_unique_nbest = true;//TODO
    boolean do_ngram_clip_nbest = true; //TODO
View Full Code Here


   
//    ????????????????????????????????????????????????????
    int ngramStateID = 0;
    //??????????????????????????????????????
   
    SymbolTable symbolTbl = new BuildinSymbol(null);
   
    boolean useIntegerString = false;
    boolean useRuleIDName = false;
   
   
View Full Code Here

    String featureFile = null;
    if(args.length>9)
      featureFile = args[9].trim();   
   
   
    SymbolTable symbolTbl = new BuildinSymbol(null);     
    List<FeatureFunction> features =  new ArrayList<FeatureFunction>();
   
   
   
    //=== baseline feature ====
View Full Code Here

   
    boolean addBaselineFeature = true;//TODO
    String baselineFeatureName = "baseline_lzf";//TODO
   
   
    SymbolTable p_symbol = new BuildinSymbol(null);
   
//    ##setup feature templates list
    ArrayList<FeatureTemplate> featureTemplates =  new ArrayList<FeatureTemplate>();
   
    boolean useIntegerString = false;
View Full Code Here

   
    //????????????????????????????????????????????????????
    int ngramStateID = 0;
    //??????????????????????????????????????
   
    SymbolTable p_symbol = new BuildinSymbol(null);
   
    //##setup feature templates list
    ArrayList<FeatureTemplate> featTemplates =  new ArrayList<FeatureTemplate>();
   
    boolean useIntegerString = false;
View Full Code Here

    }
   
    ParallelCorpusGrammarFactory parallelCorpus = this.getGrammarFactory();
   
    logger.info("Getting symbol table");
    SymbolTable sourceVocab = parallelCorpus.getSourceCorpus().getVocabulary();
   
    int lineNumber = 0;
    boolean oneTreePerSentence = ! this.keepTree;
   
    logger.info("Will read test sentences from " + testFileName);
    Scanner testFileScanner = new Scanner(new File(testFileName), encoding);
   
    logger.info("Read test sentences from " + testFileName);
    PrefixTree prefixTree = null;
    while (testFileScanner.hasNextLine() && (lineNumber-startingSentence+1)<maxTestSentences) {

      String line = testFileScanner.nextLine();
      lineNumber++;
      if (lineNumber < startingSentence) continue;
     
      int[] words = sourceVocab.getIDs(line);
     
      if (oneTreePerSentence || null==prefixTree)
      {
//        prefixTree = new PrefixTree(sourceSuffixArray, targetCorpusArray, alignments, sourceSuffixArray.getVocabulary(), lexProbs, ruleExtractor, maxPhraseSpan, maxPhraseLength, maxNonterminals, minNonterminalSpan);
        if (logger.isLoggable(Level.INFO)) logger.info("Constructing new prefix tree");
View Full Code Here

   *
   * @return a space-delimited string of the words in the
   *         phrase.
   */
  public String toString() {
    SymbolTable vocab = getVocab();
    StringBuffer buf = new StringBuffer();
        for (int i=0; i<size(); i++) {
      String word = vocab.getWord(getWordID(i));
      if (i != 0) buf.append(' ');
            buf.append(word);
        }
        return buf.toString();
  }
View Full Code Here

   
    logger.info("Constructing ARPA file");
    ArpaFile arpaFile = new ArpaFile(args[0]);
   
    logger.info("Getting symbol table");
    SymbolTable vocab = arpaFile.getVocab();
   
    logger.info("Constructing TrieLM");
    TrieLM lm = new TrieLM(arpaFile);
   
    int n = Integer.valueOf(args[2]);
    logger.info("N-gram order will be " + n);
   
    Scanner scanner = new Scanner(new File(args[1]));
   
    LinkedList<String> wordList = new LinkedList<String>();
    LinkedList<String> window = new LinkedList<String>();
   
    logger.info("Starting to scan " + args[1]);
    while (scanner.hasNext()) {
     
      logger.info("Getting next line...");
      String line = scanner.nextLine();
      logger.info("Line: " + line);
     
      String[] words = Regex.spaces.split(line);
      wordList.clear();
     
      wordList.add("<s>");
      for (String word : words) {
        wordList.add(word);
      }
      wordList.add("</s>");
     
      ArrayList<Integer> sentence = new ArrayList<Integer>();
//        int[] ids = new int[wordList.size()];
        for (int i=0, size=wordList.size(); i<size; i++) {
          sentence.add(vocab.getID(wordList.get(i)));
//          ids[i] = ;
        }
     
     
     
      while (! wordList.isEmpty()) {
        window.clear();

        {
          int i=0;
          for (String word : wordList) {
            if (i>=n) break;
            window.add(word);
            i++;
          }
          wordList.remove();
        }

        {
          int i=0;
          int[] wordIDs = new int[window.size()];
          for (String word : window) {
            wordIDs[i] = vocab.getID(word);
            i++;
          }

          logger.info("logProb " + window.toString() + " = " + lm.ngramLogProbability(wordIDs, n));
        }
View Full Code Here

 
// end readHyperGraph()
//===============================================================
  static public Map<String,Integer> obtainRuleStringToIDTable(String rulesFile) {
       
    SymbolTable symbolTable = new BuildinSymbol(null);
    GrammarReader<BilingualRule> ruleReader = new DiskHyperGraphFormatReader(rulesFile, symbolTable);
    Map<String,Integer> rulesIDTable = new HashMap<String,Integer>();
   
    ruleReader.initialize();
    for (Rule rule : ruleReader) {       
View Full Code Here

  static public int mergeDiskHyperGraphs(int ngramStateID, boolean saveModelCosts, int totalNumSent,
      boolean useUniqueNbest, boolean useTreeNbest,
      String filePrefix1, String filePrefix2, String filePrefixOut, boolean removeDuplicate) throws IOException{
   
    SymbolTable symbolTbl = new BuildinSymbol();
   
    DiskHyperGraph diskHG1 = new DiskHyperGraph(symbolTbl, ngramStateID, saveModelCosts, null);
    diskHG1.initRead(filePrefix1+".hg.items", filePrefix1+".hg.rules", null);
   
    DiskHyperGraph diskHG2 = new DiskHyperGraph(symbolTbl, ngramStateID, saveModelCosts, null);
View Full Code Here

TOP

Related Classes of joshua.corpus.vocab.SymbolTable

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.