Package joshua.corpus

Examples of joshua.corpus.Phrase


        return null != this.nextForeignPhrase;
      }
     
      public PhrasePair next() {
        if (this.hasNext()) {
          Phrase f = this.nextForeignPhrase;
         
          Phrase e = null;
          try {
            e = re.readPhrase();
          } catch (IOException ioe) {
            throw new RuntimeException("IOException", ioe);
          }
          if (null == e) {
            fileLengthMismatchException();
            return null; // Needed to make javac happy
          } else {
            if (e.size() != 0 && f.size() != 0) {
              if (null != ra) {
                String line = null;
                try {
                  line = ra.readLine();
                } catch (IOException ioe) {
                  throw new RuntimeException("IOException", ioe);
                }
               
                if (null == line) {
                  fileLengthMismatchException();
                  return null; // Needed to make javac happy
                } else {
                  Alignment a = new Alignment(
                    (short)f.size(), (short)e.size(), line);
                 
                  this.nextForeignPhrase = null;
                  return new PhrasePair(f, e, a);
                }
              } else {
View Full Code Here


    for (String fn : files) {
      System.err.println("Loading test set from " +fn+ "...");
     
      PhraseReader reader = new PhraseReader(
          new FileReader(fn), this.vf, (byte)1);
      Phrase phrase;
      int lineCount = 0;
      try {
        while ((phrase = reader.readPhrase()) != null) {
          lineCount++;
          List<Phrase> ngrams = phrase.getSubPhrases(this.maxN);
          for (Phrase ngram : ngrams) map.put(ngram, 0);
        }
      } finally {
        reader.close();
      }
View Full Code Here

        JoshuaConfiguration.use_right_equivalent_state);
   
    this.weights = new float[corpus.getNumSentences()];
   
    for (int i=0, n=corpus.getNumSentences(); i<n; i++) {
      Phrase sentence = corpus.getSentence(i);
      int[] words = sentence.getWordIDs();
      double largeProbLM = largeLM.ngramLogProbability(words);
      double testProbLM = testLM.ngramLogProbability(words);
      double ratio = testProbLM - largeProbLM;
      this.weights[i] = (float) ratio;
    }
View Full Code Here

          // ...then increment the phrase length and end of phrase marker.
          i++, endOfPhrase = currentPosition + i) {

       
        // Get the current phrase
        Phrase phrase = new ContiguousPhrase(currentPosition, endOfPhrase, corpus);

        if (logger.isLoggable(Level.FINEST)) logger.finest("Found phrase (" +currentPosition + ","+endOfPhrase+") "  + phrase);

        // If the phrase is one we care about...
        if (frequentPhrases.containsKey(phrase)) {
View Full Code Here

   */
  private void processPhraseWindow(PhrasePairCollocations collocations,
      LinkedList<Phrase> phrasesInWindow,
      LinkedList<Integer> positions) {
   
    Phrase phrase1 = phrasesInWindow.removeFirst();
    int position1 = positions.removeFirst();

    Iterator<Phrase> phraseIterator = phrasesInWindow.iterator();
    Iterator<Integer> positionIterator = positions.iterator();

    int end1 = position1 + phrase1.size();
   
    while (phraseIterator.hasNext() && positionIterator.hasNext()) {
   
      Phrase phrase2 = phraseIterator.next();
      int position2 = positionIterator.next();

      int end2 = position2 + phrase2.size();
     
      if (position2-end1 >= minNonterminalSpan  &&  end2-position1 <= maxPhraseSpan) {
        if (logger.isLoggable(Level.FINEST)) logger.finest("    Recording collocation: " + phrase1 + "\t" + phrase2 + "\t" + position1 + "\t" + position2);
        collocations.record(phrase1, phrase2, position1, position2);
      } else if (logger.isLoggable(Level.FINEST)) {
View Full Code Here

       
        for (int phraseLength : frequencyClass.validPhraseLengths(max)) {
         
          int endOfPhrase = startOfPhrase + phraseLength;
         
          Phrase phrase = new ContiguousPhrase(
              startOfPhrase,
              endOfPhrase,
              corpus);
         
          frequentPhrases.add(new Counted<Phrase>(phrase, frequency));
          if (frequentPhrases.size() > maxPhrases) {
            Counted<Phrase> pruned = frequentPhrases.poll();
            int prunedFrequency = pruned.getCount();
            prunedFrequencies.add(prunedFrequency);
            if (logger.isLoggable(Level.FINER)) logger.info("Pruned " + pruned.getElement() + " with frequency " + prunedFrequency);
            break;
          }
         
        }
      } else if (logger.isLoggable(Level.FINER)) {
        logger.finer("Skipping pruned frequency " + frequency);
      }
    }

    while (! frequentPhrases.isEmpty() && prunedFrequencies.contains(frequentPhrases.peek().getCount())) {
      Counted<Phrase> pruned = frequentPhrases.poll();
      if (logger.isLoggable(Level.FINER)) logger.finer("Pruned " + pruned.getElement() + " " + pruned.getCount());
    }
   
    Counted<Phrase>[] reverse = new Counted[frequentPhrases.size()];
    {
      int i=frequentPhrases.size()-1;
      while (! frequentPhrases.isEmpty()) {
        reverse[i] = frequentPhrases.poll();
        i -= 1;
      }
    }
   
    LinkedHashMap<Phrase,Integer> results = new LinkedHashMap<Phrase,Integer>();
    for (Counted<Phrase> countedPhrase : reverse) {
      Phrase phrase = countedPhrase.getElement();
      Integer count = countedPhrase.getCount();
      results.put(phrase, count);
    }
//   
//    while (! frequentPhrases.isEmpty()) {
View Full Code Here

          // ...then increment the phrase length and end of phrase marker.
          i++, endOfPhrase = currentPosition + i) {

        if (trackMe) logger.fine("endOfPhrase=="+endOfPhrase);
        // Get the current phrase
        Phrase phrase = new ContiguousPhrase(currentPosition, endOfPhrase, corpus);

        if (phrase.toString().equals(".")) {
          logger.fine("Huzzah, £20 for the King!");
          trackMe = true;
        }
       
        if (logger.isLoggable(Level.FINE)) logger.fine("In sentence " + sentenceNumber + " found phrase (" +currentPosition + ","+endOfPhrase+") "  + phrase);
View Full Code Here

    out.writeInt(frequentPhrases.size());
    if (loggingFiner) logger.finest("Wrote: frequentPhrases.size()="+frequentPhrases.size());
   
    // Write out frequentPhrases map
    for (Map.Entry<Phrase, Integer> entry : frequentPhrases.entrySet()) {
      Phrase phrase = entry.getKey();
      int phraseCount = entry.getValue();
      int[] wordIDs = phrase.getWordIDs();
     
      // Write out number of times the phrase is found in the corpus
      out.writeInt(phraseCount);
      if (loggingFiner) logger.finer("Wrote: phraseCount="+phraseCount);
     
View Full Code Here

    StringBuilder s = new StringBuilder();

    for (Map.Entry<Phrase, Integer> entry : frequentPhrases.entrySet()) {

      Phrase phrase = entry.getKey();
      Integer frequency = entry.getValue();

      if (format==null) {
        int length = frequency.toString().length();
        format = "%1$" + length + "d";
      }

      s.append(String.format(format, frequency));
      s.append('\t');
      s.append(phrase.toString());
      s.append('\n');

    }

    return s.toString();
View Full Code Here

    }
    List<Phrase> phrases = new ArrayList<Phrase>();
   
    for (int i = 0; i < size(); i++) {
      for (int j = i + 1; (j <= size()) && (j - i <= maxLength); j++) {
        Phrase subPhrase = subPhrase(i,j);
        phrases.add(subPhrase);
      }
    }
    return phrases;
  }
View Full Code Here

TOP

Related Classes of joshua.corpus.Phrase

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.