Examples of Vocabulary


Examples of joshua.corpus.vocab.Vocabulary

   * @param base a double. The base of the logarithm for quantization.
   */
  private BloomFilterLanguageModel(String filename, int order, int size, double base) {
    super(null, order);
    quantizationBase = base;
    vocabulary = new Vocabulary();
    populateBloomFilter(size, filename);
  }
View Full Code Here

Examples of joshua.corpus.vocab.Vocabulary

   *
   * @param in an ObjectInput stream to read from
   */
  public void readExternal(ObjectInput in)
  throws IOException, ClassNotFoundException {
    vocabulary = new Vocabulary();
    int vocabSize = in.readInt();
    for (int i = 0; i < vocabSize; i++) {
      String line = in.readUTF();
      vocabulary.addTerminal(line);
    }
View Full Code Here

Examples of joshua.corpus.vocab.Vocabulary

       
    }

  private static ParallelCorpus getParallelCorpus(String joshDir, int cacheSize) throws IOException, ClassNotFoundException {
   
    Vocabulary commonVocab = new Vocabulary();
      String binaryVocabFileName = joshDir + "/common.vocab";
      ObjectInput in = BinaryIn.vocabulary(binaryVocabFileName);
    commonVocab.readExternal(in);
   
    String sourceFileName = joshDir + "/source.corpus";
    Corpus sourceCorpusArray = new MemoryMappedCorpusArray(commonVocab, sourceFileName);

    String targetFileName = joshDir + "/target.corpus";
View Full Code Here

Examples of joshua.corpus.vocab.Vocabulary

  SymbolTable vocab;
 
  @Test
  public void setup() {
   
    vocab = new Vocabulary();
    vocab.addTerminal("a");
    vocab.addTerminal("because");
    vocab.addTerminal("boycott");
    vocab.addTerminal("of");
    vocab.addTerminal("parliament");
View Full Code Here

Examples of joshua.corpus.vocab.Vocabulary

      //      accidentally used anywhere.
     
      if (logger.isLoggable(Level.INFO))
        logger.info("Reading common vocabulary from " +
            binaryVocabFileName);
      Vocabulary commonVocab = new Vocabulary();
      commonVocab.readExternal(
          BinaryIn.vocabulary(binaryVocabFileName));

      // Initialize symbol table using suffix array's vocab
      this.initializeSymbolTable(commonVocab);
    }
View Full Code Here

Examples of joshua.corpus.vocab.Vocabulary

  PrefixTree tree;
 
  @Test(dependsOnMethods = {"prefixTreeNodes","suffixLinks"})
  public void setup() {
   
    vocab = new Vocabulary();
    it = vocab.addTerminal("it");
    persuades = vocab.addTerminal("persuades");
    him = vocab.addTerminal("him");
    and = vocab.addTerminal("and");
    disheartens = vocab.addTerminal("disheartens");
View Full Code Here

Examples of joshua.corpus.vocab.Vocabulary

    //String alignmentsType = alignmentsType;
 
    int maxCacheSize = 100000;//12566;
   
    int numSourceWords, numSourceSentences;
    Vocabulary sourceVocab = new Vocabulary();
    int[] sourceWordsSentences = Vocabulary.initializeVocabulary(sourceFileName, sourceVocab, true);
    numSourceWords = sourceWordsSentences[0];
    numSourceSentences = sourceWordsSentences[1];
   
    Corpus sourceCorpusArray = SuffixArrayFactory.createCorpusArray(sourceFileName, sourceVocab, numSourceWords, numSourceSentences);
    Suffixes sourceSuffixArray = SuffixArrayFactory.createSuffixArray(sourceCorpusArray, maxCacheSize);
   
    int numTargetWords, numTargetSentences;
    Vocabulary targetVocab = new Vocabulary();
    int[] targetWordsSentences = Vocabulary.initializeVocabulary(targetFileName, targetVocab, true);
    numTargetWords = targetWordsSentences[0];
    numTargetSentences = targetWordsSentences[1];
   
    Corpus targetCorpusArray = SuffixArrayFactory.createCorpusArray(targetFileName, targetVocab, numTargetWords, numTargetSentences);
View Full Code Here

Examples of joshua.corpus.vocab.Vocabulary

      for (char c2='a'; c2<='z'; c2++) {
        words.add(new String(new char[]{c1,c2}));
     
    }
   
    Vocabulary vocab = new Vocabulary(words);
   
    try {
     
      File tempFile = File.createTempFile(BinaryTest.class.getName(), "vocab");
      FileOutputStream outputStream = new FileOutputStream(tempFile);
      ObjectOutput out = new BinaryOut(outputStream, true);
      vocab.writeExternal(out);
     
      ObjectInput in = new BinaryIn<Vocabulary>(tempFile.getAbsolutePath(), Vocabulary.class);
      Object o = in.readObject();
      Assert.assertTrue(o instanceof Vocabulary);
     
      Vocabulary newVocab = (Vocabulary) o;
     
      Assert.assertNotNull(newVocab);
      Assert.assertEquals(newVocab.size(), vocab.size());     
     
      Assert.assertEquals(newVocab, vocab);
     

     
View Full Code Here

Examples of joshua.corpus.vocab.Vocabulary

//    refFile.close();


    // Source language vocabulary
    println("Creating src vocabulary @ " + (new Date()));
    srcVocab = new Vocabulary();
    int[] sourceWordsSentences = Vocabulary.initializeVocabulary(trainSrc_fileName, srcVocab, true);

    int numSourceWords = sourceWordsSentences[0];
    int numSourceSentences = sourceWordsSentences[1];

    // Source language corpus array
    println("Reading src corpus @ " + (new Date()));
    srcCorpusArray = SuffixArrayFactory.createCorpusArray(trainSrc_fileName, srcVocab, numSourceWords, numSourceSentences);

    // Source language suffix array
    println("Creating src SA @ " + (new Date()));
    srcSA = SuffixArrayFactory.createSuffixArray(srcCorpusArray, maxCacheSize);


    // Target language vocabulary
    println("Creating tgt vocabulary @ " + (new Date()));
    tgtVocab = new Vocabulary();
    int[] targetWordsSentences = Vocabulary.initializeVocabulary(trainTgt_fileName, tgtVocab, true);

    int numTargetWords = targetWordsSentences[0];
    int numTargetSentences = targetWordsSentences[1];
View Full Code Here

Examples of joshua.corpus.vocab.Vocabulary

    Set<String> sourceWords = new HashSet<String>();
    for (String word : corpusString.split("\\s+")) {
      sourceWords.add(word);
    }

    sourceVocab = new Vocabulary(sourceWords);
   

    corpusSentence = new BasicPhrase(corpusString, sourceVocab);
   
    targetCorpusString = "das macht ihn und es beschädigt ihn , es setzt ihn auf und es führt ihn aus .";
    Set<String> targetWords = new HashSet<String>();
    for (String targetWord : targetCorpusString.split("\\s+")) {
      targetWords.add(targetWord);
    }
   
    targetVocab = new Vocabulary(targetWords);
   
    ntVocab = new HashMap<Integer,String>();
    ntVocab.put(-1, "X");
   
    {
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.