Package cc.mallet.fst

Source Code of cc.mallet.fst.Segment

/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.
   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
   http://www.cs.umass.edu/~mccallum/mallet
   This software is provided under the terms of the Common Public License,
   version 1.0, as published by http://www.opensource.org.  For further
   information, see the file `LICENSE' included with this distribution. */

/**
    @author Aron Culotta <a href="mailto:culotta@cs.umass.edu">culotta@cs.umass.edu</a>
*/

package cc.mallet.fst;


import java.util.ArrayList;

import cc.mallet.types.ArraySequence;
import cc.mallet.types.Sequence;

/**
* Represents a labelled chunk of a {@link Sequence} segmented by a
* {@link Transducer}, usually corresponding to some object extracted
* from an input {@link Sequence}.
*/
public class Segment implements Comparable
{
  Sequence input, pred, truth; // input, predicted, and true sequences
  int start, end;              // offsets for this segment in the sequence
  Object startTag, inTag; // label for the beginning and inside of this Segment
   double confidence;           // confidence score for this extracted segment
  boolean correct;
  // this is a tough case b/c technically everything inside the
  // segment is tagged correctly
  boolean endsPrematurely; // e.g. truth: B I I O O
                           //      pred:  B I O O O
 
  /**
   * Initializes the segment.
   *
   * @param input entire input sequence
   * @param pred predicted sequence
   * @param start starting position of extracted segment
   * @param end ending position of extracted segment
   */
  public Segment (Sequence input, Sequence pred, Sequence truth, int start, int end,
                  Object startTag, Object inTag )
  {
    this.input = input;
    this.pred = pred;
    this.truth = truth;
     this.start = start;
    this.startTag = startTag;
    this.inTag = inTag;
    this.end = end;
    this.confidence = -1;
    this.correct = true;
    this.endsPrematurely = false;
    for (int i=start; i <= end; i++) {
      if (!pred.get(i).equals (truth.get(i))) {
        this.correct = false;
        break;
      }     
    }
    // segment can also be incorrect if it ends prematurely
    if (truth != null) {
      if (correct && end+1 < truth.size() && truth.get (end+1).equals (inTag)) {
        this.correct = false;
        this.endsPrematurely = true;
      }       
    }     
  }
 
  public void setCorrect (boolean b) { this.correct = b; }
  public int size() { return this.end - this.start + 1; }
  public Object getTruth (int i) { return this.truth.get( i ); }
  public Sequence getTruth () { return this.truth; }
  public Object getPredicted (int i) { return this.pred.get( i ); }
  public Sequence getPredicted () { return this.pred; }
  public void setPredicted (Sequence predicted) { this.pred = predicted; }
  public Sequence getInput () { return this.input; }
  public int getStart () { return this.start; }
  public int getEnd () { return this.end; }
  public Object getStartTag () { return this.startTag; }
  public Object getInTag () { return this.inTag; }
  public double getConfidence () {return this.confidence; }
  public void setConfidence (double c) {this.confidence = c; }
  public boolean correct () { return this.correct; }
  public boolean endsPrematurely () { return this.endsPrematurely; }
  public boolean indexInSegment (int index) {
    return (index >= this.start && index <= this.end);
  }

  public Sequence getSegmentInputSequence () {
    ArrayList ret = new ArrayList ();
    for (int i=start; i <= end; i++)
      ret.add( input.get( i ) );
    return new ArraySequence( ret );
  }
 
  public int compareTo (Object o) {
    Segment s = (Segment) o;
    if (s.confidence == -1 || this.confidence == -1) {
      throw new IllegalArgumentException ("attempting to compare confidences that have not been set yet..");
    }
    if (this.confidence > s.confidence)
      return 1;
    else if (this.confidence < s.confidence)
      return -1;
    else return 0;
  }

  public String sequenceToString () {
    String ret = "";
    for (int i=0; i < input.size(); i++) {
      if (i <= end && i >= start) // part of segment
        ret += pred.get(i).toString() + "[" + truth.get (i) + "][" + confidence + "]\t";
      else
        ret += "-[" + truth.get (i) + "]\t";
     }
    return ret;
  }

  public String toString () {
    String ret = "";
    ret += "start: " + start + " end: " + end + " confidence: " + confidence + "\n";
    for (int i=start; i <= end; i++) {
        ret += pred.get (i).toString() + "[" + truth.get (i) + "]\t";
     }
    return ret;
  }

  public boolean equals (Object o) {
    Segment s = (Segment) o;
    if (start == s.getStart() &&
        end == s.getEnd() &&
        correct == s.correct() &&
        input.size() == s.getInput().size()) {
      for (int i=start; i <= end; i++) {
        if (!pred.get( i ).equals( s.getPredicted( i ) ) ||
            !truth.get( i ).equals( s.getTruth( i ) )  )
          return false;
      }
      return true;
    }
    return false;
  }
}
TOP

Related Classes of cc.mallet.fst.Segment

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.