Package joshua.corpus.mm

Examples of joshua.corpus.mm.MemoryMappedCorpusArray


    // Source language corpus array //
    //////////////////////////////////
    final Corpus sourceCorpusArray;
    if (binaryCorpus) {
      if (logger.isLoggable(Level.INFO)) logger.info("Constructing memory mapped source language corpus array.");
      sourceCorpusArray = new MemoryMappedCorpusArray(commonVocab, sourceFileName);
    } else {
      if (logger.isLoggable(Level.INFO)) logger.info("Constructing source language corpus array.");
      sourceCorpusArray = SuffixArrayFactory.createCorpusArray(sourceFileName, commonVocab, numSourceWords, numSourceSentences);
    }

    //////////////////////////////////
    // Source language suffix array //
    //////////////////////////////////
    Suffixes sourceSuffixArray;
    String binarySourceSuffixArrayFileName = sourceSuffixesFileName;
    if (binaryCorpus) {
      if (logger.isLoggable(Level.INFO)) logger.info("Constructing source language suffix array from binary file " + binarySourceSuffixArrayFileName);
      sourceSuffixArray = new MemoryMappedSuffixArray(binarySourceSuffixArrayFileName, sourceCorpusArray, cacheSize);
    } else {
      if (logger.isLoggable(Level.INFO)) logger.info("Constructing source language suffix array from source corpus.");
      sourceSuffixArray = SuffixArrayFactory.createSuffixArray(sourceCorpusArray, cacheSize);
    }
   
   

       
    //////////////////////////////////
    // Target language corpus array //
    //////////////////////////////////
    final Corpus targetCorpusArray;
    if (binaryCorpus) {
      if (logger.isLoggable(Level.INFO)) logger.info("Constructing memory mapped target language corpus array.");
      targetCorpusArray = new MemoryMappedCorpusArray(commonVocab, targetFileName);
    } else {
      if (logger.isLoggable(Level.INFO)) logger.info("Constructing target language corpus array.");
      targetCorpusArray = SuffixArrayFactory.createCorpusArray(targetFileName, commonVocab, numTargetWords, numTargetSentences);
    }
   
View Full Code Here


      String binaryVocabFileName = joshDir + "/common.vocab";
      ObjectInput in = BinaryIn.vocabulary(binaryVocabFileName);
    commonVocab.readExternal(in);
   
    String sourceFileName = joshDir + "/source.corpus";
    Corpus sourceCorpusArray = new MemoryMappedCorpusArray(commonVocab, sourceFileName);

    String targetFileName = joshDir + "/target.corpus";
    Corpus targetCorpusArray = new MemoryMappedCorpusArray(commonVocab, targetFileName);
 
    String alignmentFileName = joshDir + "/alignment.grids";
    Alignments alignments = new MemoryMappedAlignmentGrids(alignmentFileName, sourceCorpusArray, targetCorpusArray);
 
    return new AlignedParallelCorpus(sourceCorpusArray, targetCorpusArray, alignments);
View Full Code Here

   
    if (logger.isLoggable(Level.INFO))
      logger.info("Reading source language corpus from " +
        binarySourceCorpusFileName);
    Corpus sourceCorpusArray =
      new MemoryMappedCorpusArray(
        this.symbolTable, binarySourceCorpusFileName);
   
   
    if (logger.isLoggable(Level.INFO))
      logger.info("Reading source language suffix array from " +
        binarySourceSuffixesFileName);
    Suffixes sourceSuffixArray =
      new MemoryMappedSuffixArray(
          binarySourceSuffixesFileName,
          sourceCorpusArray,
          maxCacheSize);

   
    if (logger.isLoggable(Level.INFO))
      logger.info("Reading target language corpus from " +
        binaryTargetCorpusFileName);
    Corpus targetCorpusArray =
      new MemoryMappedCorpusArray(
        this.symbolTable, binaryTargetCorpusFileName);
   
    if (logger.isLoggable(Level.INFO))
      logger.info("Reading target language suffix array from " +
        binaryTargetSuffixesFileName);
View Full Code Here

          Vocabulary commonVocab = new Vocabulary();
          ObjectInput in = BinaryIn.vocabulary(binaryVocabFileName);
          commonVocab.readExternal(in);

          logger.fine("Loading source corpus...");
          Corpus sourceCorpus = new MemoryMappedCorpusArray(commonVocab, binarySourceFileName);

          logger.fine("Loading source suffix array...");
          Suffixes sourceSuffixes = new MemoryMappedSuffixArray(binarySourceSuffixesFileName, sourceCorpus);
         
          logger.fine("Loading target corpus...");   
          Corpus targetCorpus = new MemoryMappedCorpusArray(commonVocab, binaryTargetFileName);
         
          logger.fine("Loading target suffix array...");
          Suffixes targetSuffixes = new MemoryMappedSuffixArray(binarySourceSuffixesFileName, sourceCorpus);

          logger.fine("Loading alignment grids...");
View Full Code Here

      symbolTable = new Vocabulary();
      symbolTable.readExternal(in);

      logger.info("Constructing corpus array from file " + binaryCorpusFileName);
      if (logger.isLoggable(Level.INFO)) logger.info("Constructing memory mapped source language corpus array.");
      corpusArray = new MemoryMappedCorpusArray(symbolTable, binaryCorpusFileName);

      logger.info("Constructing suffix array from file " + binarySuffixArrayFileName);
      suffixArray = new MemoryMappedSuffixArray(binarySuffixArrayFileName, corpusArray, Cache.DEFAULT_CAPACITY);

View Full Code Here

      Vocabulary.initializeVocabulary(filename, vocab, true);
      CorpusArray corpus = SuffixArrayFactory.createCorpusArray(filename, vocab, numWords, numSentences);
     
      corpus.write(filename+".corpus", filename+".vocab", "UTF-8");
     
      MemoryMappedCorpusArray mmCorpus = new MemoryMappedCorpusArray(filename+".corpus", filename+".vocab");
     
      Assert.assertEquals(mmCorpus.size(), corpus.size());
      Assert.assertEquals(mmCorpus.getNumSentences(), corpus.getNumSentences());
     
      // For each word in the corpus,
      for (int i=0; i<corpus.size(); i++) {
       
        // Verify that the memory-mapped corpus and the in-memory corpus have the same value
        Assert.assertEquals(mmCorpus.getWordID(i), corpus.getWordID(i));
      }
     
     
      // For each sentence in the corpus
      for (int i=0; i<corpus.sentences.length; i++) {
       
        // Verify that the sentence start position in the memory-mapped corpus and the in-memory corpus have the same value
        Assert.assertEquals(mmCorpus.getSentencePosition(i), corpus.getSentencePosition(i));
       
        // Verify that the sentence end position in the memory-mapped corpus and the in-memory corpus have the same value
        Assert.assertEquals(mmCorpus.getSentenceEndPosition(i), corpus.getSentenceEndPosition(i));
       
        // Verify that the phrase corresponding to this sentence is the same
        Phrase sentence = corpus.getSentence(i);
        Phrase mmSentence = mmCorpus.getSentence(i);
        Assert.assertNotNull(sentence);
        Assert.assertNotNull(mmSentence);
        Assert.assertEquals(mmSentence, sentence);
      }
     
View Full Code Here

TOP

Related Classes of joshua.corpus.mm.MemoryMappedCorpusArray

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.