Package cc.mallet.fst.confidence

Source Code of cc.mallet.fst.confidence.TransducerConfidenceEstimator

/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.
   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
   http://www.cs.umass.edu/~mccallum/mallet
   This software is provided under the terms of the Common Public License,
   version 1.0, as published by http://www.opensource.org.  For further
   information, see the file `LICENSE' included with this distribution. */

/**
    @author Aron Culotta <a href="mailto:culotta@cs.umass.edu">culotta@cs.umass.edu</a>
*/

package cc.mallet.fst.confidence;


import java.util.logging.*;
import java.util.*;
import java.io.Serializable;

import cc.mallet.extract.LabeledSpan;
import cc.mallet.fst.*;
import cc.mallet.pipe.Noop;
import cc.mallet.pipe.iterator.*;
import cc.mallet.types.*;
import cc.mallet.util.MalletLogger;

/**
* Abstract class that estimates the confidence of a {@link Segment}
* extracted by a {@link Transducer}.
*/
abstract public class TransducerConfidenceEstimator implements Serializable
{
  private static Logger logger = MalletLogger.getLogger(TransducerConfidenceEstimator.class.getName());

  protected Transducer model; // the trained Transducer which
                              // performed the extractions.

  java.util.Vector segmentConfidences;

  public TransducerConfidenceEstimator (Transducer model) {
    this.model = model;
  }
 
  /**
     Calculates the confidence in the tagging of a {@link Segment}.
   */
  public double estimateConfidenceFor (Segment segment) {
    return estimateConfidenceFor (segment, null);
  }

  abstract public double estimateConfidenceFor (Segment segment, SumLatticeDefault lattice);

  public java.util.Vector getSegmentConfidences () {return this.segmentConfidences;}

  /**
     Ranks all {@link Segment}s in this {@link InstanceList} by
     confidence estimate.
     @param ilist list of segmentation instances
     @param startTags represent the labels for the start states (B-)
     of all segments
     @param continueTags represent the labels for the continue state
     (I-) of all segments
     @return array of {@link Segment}s ordered by non-decreasing
     confidence scores, as calculated by <code>estimateConfidenceFor</code>
   */
  public Segment[] rankSegmentsByConfidence (InstanceList ilist, Object[] startTags,
                                             Object[] continueTags) {
    ArrayList segmentList = new ArrayList ();
    SegmentIterator iter = new SegmentIterator (this.model, ilist, startTags, continueTags);     
    if (this.segmentConfidences == null)
      segmentConfidences = new java.util.Vector ();
    while (iter.hasNext ()) {
      Segment segment = (Segment) iter.nextSegment ();
      double confidence = estimateConfidenceFor (segment);
      segment.setConfidence (confidence);
      logger.fine ("confidence=" + segment.getConfidence() + " for segment\n"
                   + segment.sequenceToString() + "\n");
      segmentList.add (segment);
    }
    Collections.sort (segmentList);
    Segment[] ret = new Segment[1];
    ret = (Segment[]) segmentList.toArray (ret);
    return ret;
  }

  /**
     ranks the segments in one {@link Instance}
     @param instance instances to be segmented
     @param startTags represent the labels for the start states (e.g. B-)
     of all segments
     @param continueTags represent the labels for the continue state
     (e.g. I-) of all segments
     @return array of {@link Segment}s ordered by non-decreasing
     confidence scores, as calculated by <code>estimateConfidenceFor</code>
   */
  public Segment[] rankSegmentsByConfidence (Instance instance, Object[] startTags,
                                             Object[] continueTags) {
    InstanceList ilist = new InstanceList (new Noop(instance.getDataAlphabet(),instance.getTargetAlphabet()));
    ilist.add (instance);
    return rankSegmentsByConfidence (ilist, startTags, continueTags);
  }

        public Transducer getTransducer() { return this.model; }
}
TOP

Related Classes of cc.mallet.fst.confidence.TransducerConfidenceEstimator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.