Package joshua.corpus.vocab

Examples of joshua.corpus.vocab.Vocabulary


  PrefixTree tree;
 
  @Test(dependsOnMethods = {"prefixTreeNodes","suffixLinks"})
  public void setup() {
   
    vocab = new Vocabulary();
    it = vocab.addTerminal("it");
    persuades = vocab.addTerminal("persuades");
    him = vocab.addTerminal("him");
    and = vocab.addTerminal("and");
    disheartens = vocab.addTerminal("disheartens");
View Full Code Here


    //String alignmentsType = alignmentsType;
 
    int maxCacheSize = 100000;//12566;
   
    int numSourceWords, numSourceSentences;
    Vocabulary sourceVocab = new Vocabulary();
    int[] sourceWordsSentences = Vocabulary.initializeVocabulary(sourceFileName, sourceVocab, true);
    numSourceWords = sourceWordsSentences[0];
    numSourceSentences = sourceWordsSentences[1];
   
    Corpus sourceCorpusArray = SuffixArrayFactory.createCorpusArray(sourceFileName, sourceVocab, numSourceWords, numSourceSentences);
    Suffixes sourceSuffixArray = SuffixArrayFactory.createSuffixArray(sourceCorpusArray, maxCacheSize);
   
    int numTargetWords, numTargetSentences;
    Vocabulary targetVocab = new Vocabulary();
    int[] targetWordsSentences = Vocabulary.initializeVocabulary(targetFileName, targetVocab, true);
    numTargetWords = targetWordsSentences[0];
    numTargetSentences = targetWordsSentences[1];
   
    Corpus targetCorpusArray = SuffixArrayFactory.createCorpusArray(targetFileName, targetVocab, numTargetWords, numTargetSentences);
View Full Code Here

      for (char c2='a'; c2<='z'; c2++) {
        words.add(new String(new char[]{c1,c2}));
     
    }
   
    Vocabulary vocab = new Vocabulary(words);
   
    try {
     
      File tempFile = File.createTempFile(BinaryTest.class.getName(), "vocab");
      FileOutputStream outputStream = new FileOutputStream(tempFile);
      ObjectOutput out = new BinaryOut(outputStream, true);
      vocab.writeExternal(out);
     
      ObjectInput in = new BinaryIn<Vocabulary>(tempFile.getAbsolutePath(), Vocabulary.class);
      Object o = in.readObject();
      Assert.assertTrue(o instanceof Vocabulary);
     
      Vocabulary newVocab = (Vocabulary) o;
     
      Assert.assertNotNull(newVocab);
      Assert.assertEquals(newVocab.size(), vocab.size());     
     
      Assert.assertEquals(newVocab, vocab);
     

     
View Full Code Here

//    refFile.close();


    // Source language vocabulary
    println("Creating src vocabulary @ " + (new Date()));
    srcVocab = new Vocabulary();
    int[] sourceWordsSentences = Vocabulary.initializeVocabulary(trainSrc_fileName, srcVocab, true);

    int numSourceWords = sourceWordsSentences[0];
    int numSourceSentences = sourceWordsSentences[1];

    // Source language corpus array
    println("Reading src corpus @ " + (new Date()));
    srcCorpusArray = SuffixArrayFactory.createCorpusArray(trainSrc_fileName, srcVocab, numSourceWords, numSourceSentences);

    // Source language suffix array
    println("Creating src SA @ " + (new Date()));
    srcSA = SuffixArrayFactory.createSuffixArray(srcCorpusArray, maxCacheSize);


    // Target language vocabulary
    println("Creating tgt vocabulary @ " + (new Date()));
    tgtVocab = new Vocabulary();
    int[] targetWordsSentences = Vocabulary.initializeVocabulary(trainTgt_fileName, tgtVocab, true);

    int numTargetWords = targetWordsSentences[0];
    int numTargetSentences = targetWordsSentences[1];
View Full Code Here

    Set<String> sourceWords = new HashSet<String>();
    for (String word : corpusString.split("\\s+")) {
      sourceWords.add(word);
    }

    sourceVocab = new Vocabulary(sourceWords);
   

    corpusSentence = new BasicPhrase(corpusString, sourceVocab);
   
    targetCorpusString = "das macht ihn und es beschädigt ihn , es setzt ihn auf und es führt ihn aus .";
    Set<String> targetWords = new HashSet<String>();
    for (String targetWord : targetCorpusString.split("\\s+")) {
      targetWords.add(targetWord);
    }
   
    targetVocab = new Vocabulary(targetWords);
   
    ntVocab = new HashMap<Integer,String>();
    ntVocab.put(-1, "X");
   
    {
View Full Code Here

   * Constructs an empty corpus.
   * <p>
   * NOTE: Primarily needed for Externalizable interface.
   */
  public CorpusArray() {
    super(new Vocabulary());
//    this.symbolTable = new Vocabulary();
    this.sentences = new int[]{};
    this.corpus = new int[]{};
  }
View Full Code Here

    String corpusFileName = args[0];
    String binaryVocabFilename = args[1];
    String binaryCorpusFilename = args[2];
    String charset = (args.length > 3) ? args[3] : "UTF-8";
   
    Vocabulary symbolTable = new Vocabulary();
    int[] lengths = Vocabulary.initializeVocabulary(corpusFileName, symbolTable, true);
   
    CorpusArray corpusArray = SuffixArrayFactory.createCorpusArray(corpusFileName, symbolTable, lengths[0], lengths[1]);
   
    corpusArray.write(binaryCorpusFilename, binaryVocabFilename, charset);
View Full Code Here

  private int[]      words;
 
 
  public BasicPhrase(byte language, String sentence) {
    this.language   = language;
    this.vocabulary = new Vocabulary();
    this.words = splitSentence(sentence, vocabulary);
  }
View Full Code Here

    int lastSentence = Integer.parseInt(argv[3]);
    HashMap<Integer,Integer> chosenSentences = new HashMap<Integer,Integer>();
    for (int i = firstSentence; i < lastSentence; i++) {
      chosenSentences.put(i, i);
    }
    Vocabulary vocab = new Vocabulary();
    DiskHyperGraph dhg = new DiskHyperGraph(vocab, 0, true, null);
    dhg.initRead(itemsFile, rulesFile, chosenSentences);
    JungHyperGraph hg = new JungHyperGraph(dhg.readHyperGraph(), vocab);
    JFrame frame = new JFrame("Joshua Hypergraph");
    frame.getContentPane().add(new HyperGraphViewer(hg, vocab));
View Full Code Here

    int lastSentence = Integer.parseInt(argv[3]);
    HashMap<Integer,Integer> chosenSentences = new HashMap<Integer,Integer>();
    for (int i = firstSentence; i < lastSentence; i++) {
      chosenSentences.put(i, i);
    }
    Vocabulary vocab = new Vocabulary();
    DiskHyperGraph dhg = new DiskHyperGraph(vocab, 0, true, null);
    dhg.initRead(itemsFile, rulesFile, chosenSentences);
    JungHyperGraph hg = new JungHyperGraph(dhg.readHyperGraph(), vocab);
    return;
  }
View Full Code Here

TOP

Related Classes of joshua.corpus.vocab.Vocabulary

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.