Package at.ofai.gate.virtualdocuments

Source Code of at.ofai.gate.virtualdocuments.ExportContainedAnnotationsPR

/*
*  AnnotateBySpecPR.java
*
*  This file is is free software, licenced under the
*  GNU Library General Public License, Version 2, June 1991.
*  See http://www.gnu.org/licenses/gpl-2.0.html
*
*  $Id: $
*/

package at.ofai.gate.virtualdocuments;

import gate.Annotation;
import gate.AnnotationSet;
import gate.Controller;
import gate.LanguageAnalyser;
import gate.Resource;
import gate.Utils;
import gate.creole.AbstractLanguageAnalyser;
import gate.creole.ControllerAwarePR;
import gate.creole.ExecutionException;
import gate.creole.ResourceInstantiationException;
import gate.creole.metadata.CreoleParameter;
import gate.creole.metadata.CreoleResource;
import gate.creole.metadata.Optional;
import gate.creole.metadata.RunTime;
import gate.util.GateRuntimeException;
import gate.util.OffsetComparator;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintStream;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.log4j.Logger;

/**
* This PR takes the name of a containing annotation type,
* a contained annotation type name
* and a feature name.
* It will find for each containing annotation type, the contained annotations
* and output a single line with all the values of what the specified feature
* contains for each of these annotations.
* The PR also takes a suffix (which could be an extension) and a directory URL
* and will create a new file for each document processed, storing the file
* in the directory and forming the file name by appending the suffix to the
* document name
* @author Johann Petrak
*/
@CreoleResource(name = "Export Contained Annotations PR",
        comment = "Export lines of text based on annotations contained in other annotations")
public class ExportContainedAnnotationsPR
  extends AbstractLanguageAnalyser
  implements LanguageAnalyser, ControllerAwarePR
{
  // parms input annotation set, containing annotation type name,
  // contained annotation type name, feature name
  // If feature name = blank, take string
  // If containing annotation type name is blank, use whole document
  // Contained annotation type name must not be blank
  // Directory URL, must not be null
  // suffix, if blank do not use any suffix

  private static final long serialVersionUID = 1L;

  @RunTime
  @Optional
  @CreoleParameter(
    comment = "Name of the input annotation set",
    defaultValue = "")
  public void setInputAnnotationSetName(String annotationSetName) {
    this.inputAnnotationSetName = annotationSetName;
  }
  public String getInputAnnotationSetName() {
    return inputAnnotationSetName;
  }
  protected String inputAnnotationSetName;

  @RunTime
  @Optional
  @CreoleParameter(
    comment = "Name of the containing annotations",
    defaultValue = "Sentence")
  public void setContainingAnnotationTypeName(String annotationType) {
    this.containingAnnotationTypeName = annotationType;
  }
  public String getContainingAnnotationTypeName() {
    return containingAnnotationTypeName;
  }
  String containingAnnotationTypeName;

  @RunTime
  @Optional
  @CreoleParameter(
    comment = "Name of the contained annotations",
    defaultValue = "Token"
    )
  public void setContainedAnnotationTypeName(String annotationType) {
    this.containedAnnotationTypeName = annotationType;
  }
  public String getContainedAnnotationTypeName() {
    return containedAnnotationTypeName;
  }
  String containedAnnotationTypeName;

  @RunTime
  @Optional
  @CreoleParameter(
    comment = "Name of the feature to use for the contained annotation (default: underlying text)",
    defaultValue = ""
    )
  public void setFeatureName(String name) {
    this.featureName = name;
  }
  public String getFeatureName() {
    return featureName;
  }
  String featureName;
 
  @RunTime
  @Optional
  @CreoleParameter(
    comment = "Suffix to use to form file names from document names",
    defaultValue = ".txt"
    )
  public void setFileNameSuffix(String name) {
    this.fileNameSuffix = name;
  }
  public String getFileNameSuffix() {
    return fileNameSuffix;
  }
  String fileNameSuffix;

  @RunTime
  @Optional
  @CreoleParameter(
    comment = "URL of the directory where to write the document files to",
    defaultValue = "."
    )
  public void setDirectoryUrl(URL directoryUrl) {
    this.directoryUrl = directoryUrl;
  }
  public URL getDirectoryUrl() {
    return directoryUrl;
  }
  URL directoryUrl;

  File directoryFile;

  protected Logger logger;
 
  @Override
  public Resource init() throws ResourceInstantiationException {
    super.init();
    logger = Logger.getLogger(this.getClass());

    if(getContainedAnnotationTypeName() == null || getContainedAnnotationTypeName().equals("")) {
      throw new ResourceInstantiationException("Contained Annotation Type Name must be specified");
    }
    try {
      directoryFile = new File(directoryUrl.toURI());
    } catch (URISyntaxException ex) {
      throw new ResourceInstantiationException(ex);
    }
    return this;
  }

  @Override
  public void execute() throws ExecutionException {
    fireStatusChanged("ExportContainedAnnotationsPR processing: "
            + getDocument().getName());

    //check the input
    if(document == null) {
      throw new ExecutionException(
        "No document to process!"
      );
    }

    String fileName;
    fileName = getDocument().getName() + getFileNameSuffix();

    // 1) File to write the export stuff to
    File outFile = new File(directoryFile,fileName);
    PrintStream outStream;
    try {
      outStream = new PrintStream(outFile);
    } catch (FileNotFoundException ex) {
      throw new GateRuntimeException("Cannot write to file "+outFile.getAbsoluteFile(),ex);
    }

    // if a contained annotation name is specified, get those anns,
    // sort by offset and then loop through them, otherwise just get
    // all annotations
    AnnotationSet inputAS = document.getAnnotations(inputAnnotationSetName);


    if(containingAnnotationTypeName != null && !containingAnnotationTypeName.equals(""))  {
      AnnotationSet containingAnnotations = inputAS.get(containingAnnotationTypeName);
      //Iterator<Annotation> it = inputAnnotations.iterator();
      //String content = document.getContent().toString();

      List<Annotation> containingList = new ArrayList<Annotation>(containingAnnotations);
      Collections.sort(containingList,new OffsetComparator());
      for(Annotation containing : containingList) {
        // export the contained for the span covered by containing
        exportContainedAnnotations(outStream,containing.getStartNode().getOffset(),
                containing.getEndNode().getOffset(),inputAS);
      }
    } else {
      // export the contained for the whole document span
      exportContainedAnnotations(outStream,0L,Utils.lengthLong(document),
              inputAS);
    }
    outStream.close();
    fireStatusChanged("ExportContainedAnnotationsPR completed");

  }

  protected boolean exportContainedAnnotations(PrintStream outStream, Long from, Long to,
          AnnotationSet inputAS) {
    AnnotationSet filteredAS = inputAS.get(containedAnnotationTypeName,from,to);
    List<Annotation> filteredList = new ArrayList<Annotation>(filteredAS);
    Collections.sort(filteredList,new OffsetComparator());
    boolean haveSomething = false;
    for(Annotation ann : filteredList) {
      String toExport;
      if(featureName == null || featureName.equals("")) {
        toExport = Utils.stringFor(document, ann);
      } else {
        toExport = ann.getFeatures().get(featureName).toString();
      }
      outStream.print(toExport+" ");
      haveSomething = true;
    }
    if(haveSomething) { outStream.println(); }
    return haveSomething;
  }

  @Override
  public void controllerExecutionAborted(Controller arg0, Throwable arg1)
      throws ExecutionException {
  }

  @Override
  public void controllerExecutionFinished(Controller arg0)
      throws ExecutionException {
  }

  @Override
  public void controllerExecutionStarted(Controller arg0)
      throws ExecutionException {
    startup();
  }

  public void startup() throws ExecutionException {
    if(getContainedAnnotationTypeName() == null ||
       getContainedAnnotationTypeName().equals("")) {
    throw new ExecutionException("Contained Annotation Type Name must be specified");
  }
    // TODO: more parameter checking!
  try {
    directoryFile = new File(directoryUrl.toURI());
  } catch (URISyntaxException ex) {
    throw new ExecutionException(ex);
 
  }
 
}
TOP

Related Classes of at.ofai.gate.virtualdocuments.ExportContainedAnnotationsPR

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.