Package weka.core.xml

Source Code of weka.core.xml.XMLInstances

/*
*    This program is free software; you can redistribute it and/or modify
*    it under the terms of the GNU General Public License as published by
*    the Free Software Foundation; either version 2 of the License, or
*    (at your option) any later version.
*
*    This program is distributed in the hope that it will be useful,
*    but WITHOUT ANY WARRANTY; without even the implied warranty of
*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*    GNU General Public License for more details.
*
*    You should have received a copy of the GNU General Public License
*    along with this program; if not, write to the Free Software
*    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

/*
* XMLInstances.java
* Copyright (C) 2006 University of Waikato, Hamilton, New Zealand
*/

package weka.core.xml;

import weka.core.Attribute;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.ProtectedProperties;
import weka.core.RevisionUtils;
import weka.core.SparseInstance;
import weka.core.Utils;
import weka.core.Version;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.Serializable;
import java.util.Enumeration;
import java.util.Properties;
import java.util.Vector;
import java.util.zip.GZIPInputStream;

import org.w3c.dom.Element;

/**
* XML representation of the Instances class.
*
* @author  fracpete (fracpete at waikato dot ac dot nz)
* @version $Revision: 1.4 $
*/
public class XMLInstances
  extends XMLDocument
  implements Serializable {

  /** for serialization */
  private static final long serialVersionUID = 3626821327547416099L;
 
  /** The filename extension that should be used for xrff files */
  public static String FILE_EXTENSION = ".xrff";
 
  // tags
  /** the root element */
  public final static String TAG_DATASET = "dataset";

  /** the header element */
  public final static String TAG_HEADER = "header";

  /** the body element */
  public final static String TAG_BODY = "body";

  /** the notes element */
  public final static String TAG_NOTES = "notes";

  /** the attributes element */
  public final static String TAG_ATTRIBUTES = "attributes";

  /** the attribute element */
  public final static String TAG_ATTRIBUTE = "attribute";

  /** the labels element */
  public final static String TAG_LABELS = "labels";

  /** the label element */
  public final static String TAG_LABEL = "label";

  /** the meta-data element */
  public final static String TAG_METADATA = "metadata";

  /** the property element */
  public final static String TAG_PROPERTY = "property";

  /** the data element */
  public final static String TAG_INSTANCES = "instances";

  /** the instance element */
  public final static String TAG_INSTANCE = "instance";

  /** the value element */
  public final static String TAG_VALUE = "value";
 
  // attributes
  /** the version attribute */
  public final static String ATT_VERSION = "version";
 
  /** the type attribute */
  public final static String ATT_TYPE = "type";
 
  /** the format attribute (for date attributes) */
  public final static String ATT_FORMAT = "format";
 
  /** the class attribute */
  public final static String ATT_CLASS = "class";
 
  /** the index attribute */
  public final static String ATT_INDEX = "index";
 
  /** the weight attribute */
  public final static String ATT_WEIGHT = "weight";
 
  /** the missing attribute */
  public final static String ATT_MISSING = "missing";
 
  // values
  /** the value for numeric */
  public final static String VAL_NUMERIC = "numeric";
 
  /** the value for date */
  public final static String VAL_DATE = "date";
 
  /** the value for nominal */
  public final static String VAL_NOMINAL = "nominal";
 
  /** the value for string */
  public final static String VAL_STRING = "string";
 
  /** the value for relational */
  public final static String VAL_RELATIONAL = "relational";
 
  /** the value for normal */
  public final static String VAL_NORMAL = "normal";
 
  /** the value for sparse */
  public final static String VAL_SPARSE = "sparse";
 
  /** the DTD */
  public final static String DOCTYPE =
      "<!" + DTD_DOCTYPE + " " + TAG_DATASET + "\n"
    + "[\n"
    + "   <!" + DTD_ELEMENT + " " + TAG_DATASET + " (" + TAG_HEADER + "," + TAG_BODY + ")" + ">\n"
    + "   <!" + DTD_ATTLIST + " " + TAG_DATASET + " " + ATT_NAME + " " + DTD_CDATA + " " + DTD_REQUIRED + ">\n"
    + "   <!" + DTD_ATTLIST + " " + TAG_DATASET + " " + ATT_VERSION + " " + DTD_CDATA + " \"" + Version.VERSION + "\">\n"
    + "\n"
    + "   <!" + DTD_ELEMENT + " " + TAG_HEADER + " (" + TAG_NOTES + DTD_OPTIONAL + "," + TAG_ATTRIBUTES + ")" + ">\n"
    + "   <!" + DTD_ELEMENT + " " + TAG_BODY   + " (" + TAG_INSTANCES  + ")" + ">\n"
    + "   <!" + DTD_ELEMENT + " " + TAG_NOTES + " " + DTD_ANY + ">   <!--  comments, information, copyright, etc. -->\n"
    + "\n"
    + "   <!" + DTD_ELEMENT + " " + TAG_ATTRIBUTES + " (" + TAG_ATTRIBUTE + DTD_AT_LEAST_ONE + ")" + ">\n"
    + "   <!" + DTD_ELEMENT + " " + TAG_ATTRIBUTE + " (" + TAG_LABELS + DTD_OPTIONAL + "," + TAG_METADATA + DTD_OPTIONAL + "," + TAG_ATTRIBUTES + DTD_OPTIONAL + ")" + ">\n"
    + "   <!" + DTD_ATTLIST + " " + TAG_ATTRIBUTE + " " + ATT_NAME + " " + DTD_CDATA + " " + DTD_REQUIRED + ">\n"
    + "   <!" + DTD_ATTLIST + " " + TAG_ATTRIBUTE + " " + ATT_TYPE + " (" + VAL_NUMERIC + DTD_SEPARATOR + VAL_DATE + DTD_SEPARATOR + VAL_NOMINAL + DTD_SEPARATOR + VAL_STRING + DTD_SEPARATOR + VAL_RELATIONAL + ") " + DTD_REQUIRED + ">\n"
    + "   <!" + DTD_ATTLIST + " " + TAG_ATTRIBUTE + " " + ATT_FORMAT + " " + DTD_CDATA + " " + DTD_IMPLIED + ">\n"
    + "   <!" + DTD_ATTLIST + " " + TAG_ATTRIBUTE + " " + ATT_CLASS + " (" + VAL_YES + DTD_SEPARATOR + VAL_NO + ") \"" + VAL_NO + "\"" + ">\n"
    + "   <!" + DTD_ELEMENT + " " + TAG_LABELS + " (" + TAG_LABEL + DTD_ZERO_OR_MORE + ")" + ">   <!-- only for type \"nominal\" -->\n"
    + "   <!" + DTD_ELEMENT + " " + TAG_LABEL + " " + DTD_ANY + ">\n"
    + "   <!" + DTD_ELEMENT + " " + TAG_METADATA + " (" + TAG_PROPERTY + DTD_ZERO_OR_MORE + ")" + ">\n"
    + "   <!" + DTD_ELEMENT + " " + TAG_PROPERTY + " " + DTD_ANY + ">\n"
    + "   <!" + DTD_ATTLIST + " " + TAG_PROPERTY + " " + ATT_NAME + " " + DTD_CDATA + " " + DTD_REQUIRED + ">\n"
    + "\n"
    + "   <!" + DTD_ELEMENT + " " + TAG_INSTANCES + " (" + TAG_INSTANCE + DTD_ZERO_OR_MORE + ")" + ">\n"
    + "   <!" + DTD_ELEMENT + " " + TAG_INSTANCE + " (" + TAG_VALUE + DTD_ZERO_OR_MORE + ")" + ">\n"
    + "   <!" + DTD_ATTLIST + " " + TAG_INSTANCE + " " + ATT_TYPE + " (" + VAL_NORMAL + DTD_SEPARATOR + VAL_SPARSE + ") \"" + VAL_NORMAL + "\"" + ">\n"
    + "   <!" + DTD_ATTLIST + " " + TAG_INSTANCE + " " + ATT_WEIGHT + " " + DTD_CDATA + " " + DTD_IMPLIED + ">\n"
    + "   <!" + DTD_ELEMENT + " " + TAG_VALUE + " (" + DTD_PCDATA + DTD_SEPARATOR + TAG_INSTANCES + ")" + DTD_ZERO_OR_MORE + ">\n"
    + "   <!" + DTD_ATTLIST + " " + TAG_VALUE + " " + ATT_INDEX + " " + DTD_CDATA + " " + DTD_IMPLIED + ">   <!-- 1-based index (only used for instance format \"sparse\") -->\n"
    + "   <!" + DTD_ATTLIST + " " + TAG_VALUE + " " + ATT_MISSING + " (" + VAL_YES + DTD_SEPARATOR + VAL_NO + ") \"" + VAL_NO + "\"" + ">\n"
    + "]\n"
    + ">";

  /** the precision for numbers */
  protected int m_Precision = 6;
 
  /** the underlying Instances */
  protected Instances m_Instances;
 
  /**
   * the default constructor
   *
   * @throws Exception  if XML initialization fails
   */
  public XMLInstances() throws Exception {
    super();
   
    m_Instances = null;

    setDocType(DOCTYPE);
    setRootNode(TAG_DATASET);
    setValidating(true);
  }
 
  /**
   * generates the XML structure based on the given data
   *
   * @param data  the data to build the XML structure from
   * @throws Exception  if initialization/generation fails
   */
  public XMLInstances(Instances data) throws Exception {
    this();
   
    setInstances(data);
  }
 
  /**
   * generates the Instances directly from the reader containing the
   * XML data.
   *
   * @param reader  the reader for the XML data
   * @throws Exception  if something goes wrong
   */
  public XMLInstances(Reader reader) throws Exception {
    this();
   
    setXML(reader);
  }
 
  /**
   * adds the attribute to the XML structure
   *
   * @param parent  the parent node to add the attribute node as child
   * @param att    the attribute to add
   */
  protected void addAttribute(Element parent, Attribute att) {
    Element    node;
    Element    child;
    Element    property;
    Element    label;
    String    tmpStr;
    Enumeration    enm;
    int      i;
   
    node = m_Document.createElement(TAG_ATTRIBUTE);
    parent.appendChild(node);
   
    // XML attributes
    // name
    node.setAttribute(ATT_NAME, validContent(att.name()));
   
    // type
    switch (att.type()) {
      case Attribute.NUMERIC:
  node.setAttribute(ATT_TYPE, VAL_NUMERIC);
  break;
 
      case Attribute.DATE:
  node.setAttribute(ATT_TYPE, VAL_DATE);
  break;
 
      case Attribute.NOMINAL:
  node.setAttribute(ATT_TYPE, VAL_NOMINAL);
  break;
 
      case Attribute.STRING:
  node.setAttribute(ATT_TYPE, VAL_STRING);
  break;
 
      case Attribute.RELATIONAL:
  node.setAttribute(ATT_TYPE, VAL_RELATIONAL);
  break;
 
      default:
  node.setAttribute(ATT_TYPE, "???");
    }
   
    // labels
    if (att.isNominal()) {
      child = m_Document.createElement(TAG_LABELS);
      node.appendChild(child);
      enm = att.enumerateValues();
      while (enm.hasMoreElements()) {
  tmpStr = enm.nextElement().toString();
  label = m_Document.createElement(TAG_LABEL);
  child.appendChild(label);
  label.appendChild(m_Document.createTextNode(validContent(tmpStr)));
      }
    }
   
    // format
    if (att.isDate())
      node.setAttribute(ATT_FORMAT, validContent(att.getDateFormat()));
   
    // class
    if (m_Instances.classIndex() > -1) {
      if (att == m_Instances.classAttribute())
  node.setAttribute(ATT_CLASS, VAL_YES);
    }
   
    // add meta-data
    if ( (att.getMetadata() != null) && (att.getMetadata().size() > 0) ) {
      child = m_Document.createElement(TAG_METADATA);
      node.appendChild(child);
      enm = att.getMetadata().propertyNames();
      while (enm.hasMoreElements()) {
  tmpStr = enm.nextElement().toString();
  property = m_Document.createElement(TAG_PROPERTY);
  child.appendChild(property);
  property.setAttribute(ATT_NAME, validContent(tmpStr));
  property.appendChild(m_Document.createTextNode(validContent(att.getMetadata().getProperty(tmpStr, ""))));
      }
    }
   
    // relational attribute?
    if (att.isRelationValued()) {
      child = m_Document.createElement(TAG_ATTRIBUTES);
      node.appendChild(child);
      for (i = 0; i < att.relation().numAttributes(); i++)
  addAttribute(child, att.relation().attribute(i));
    }
  }

  /**
   * turns all &lt;, &gt; and &amp;into character entities and returns that
   * string. Necessary for TextNodes.
   *
   * @param content  string to convert
   * @return    the valid content string
   */
  protected String validContent(String content) {
    String  result;
   
    result = content;
   
    // these five entities are recognized by every XML processor
    // see http://www.xml.com/pub/a/2001/03/14/trxml10.html
    result = result.replaceAll("&", "&amp;")
                   .replaceAll("\"", "&quot;")
                   .replaceAll("'", "&apos;")
                   .replaceAll("<", "&lt;")
                   .replaceAll(">", "&gt;");
    // in addition, replace some other entities as well
    result = result.replaceAll("\n", "&#10;")
                   .replaceAll("\r", "&#13;")
                   .replaceAll("\t", "&#9;");
   
    return result;
  }
 
  /**
   * adds the instance to the XML structure
   *
   * @param parent  the parent node to add the instance node as child
   * @param inst  the instance to add
   */
  protected void addInstance(Element parent, Instance inst) {
    Element    node;
    Element    value;
    Element    child;
    boolean    sparse;
    int      i;
    int      n;
    int      index;
   
    node = m_Document.createElement(TAG_INSTANCE);
    parent.appendChild(node);
   
    // sparse?
    sparse = (inst instanceof SparseInstance);
    if (sparse)
      node.setAttribute(ATT_TYPE, VAL_SPARSE);
   
    // weight
    if (inst.weight() != 1.0)
      node.setAttribute(ATT_WEIGHT, Utils.doubleToString(inst.weight(), m_Precision));
   
    // values
    for (i = 0; i < inst.numValues(); i++) {
      index = inst.index(i);
     
      value = m_Document.createElement(TAG_VALUE);
      node.appendChild(value);

      if (inst.isMissing(index)) {
  value.setAttribute(ATT_MISSING, VAL_YES);
      }
      else {
  if (inst.attribute(index).isRelationValued()) {
    child = m_Document.createElement(TAG_INSTANCES);
    value.appendChild(child);
    for (n = 0; n < inst.relationalValue(i).numInstances(); n++)
      addInstance(child, inst.relationalValue(i).instance(n));
  }
  else {
    if (inst.attribute(index).type() == Attribute.NUMERIC)
      value.appendChild(m_Document.createTextNode(Utils.doubleToString(inst.value(index), m_Precision)));
    else
      value.appendChild(m_Document.createTextNode(validContent(inst.stringValue(index))));
  }
      }
     
      if (sparse)
  value.setAttribute(ATT_INDEX, "" + (index+1));
    }
  }
 
  /**
   * generates the XML structure for the header
   */
  protected void headerToXML() {
    Element  root;
    Element  node;
    Element  child;
    int    i;
   
    root = m_Document.getDocumentElement();
    root.setAttribute(ATT_NAME, validContent(m_Instances.relationName()));
    root.setAttribute(ATT_VERSION, Version.VERSION);
   
    // create "header" node
    node = m_Document.createElement(TAG_HEADER);
    root.appendChild(node);

    // add all attributes
    child = m_Document.createElement(TAG_ATTRIBUTES);
    node.appendChild(child);
    for (i = 0; i < m_Instances.numAttributes(); i++)
      addAttribute(child, m_Instances.attribute(i));
  }
 
  /**
   * generates the XML structure from the rows
   */
  protected void dataToXML() {
    Element  root;
    Element  node;
    Element  child;
    int    i;
   
    root = m_Document.getDocumentElement();
   
    // create "body" node
    node = m_Document.createElement(TAG_BODY);
    root.appendChild(node);

    // add all instances
    child = m_Document.createElement(TAG_INSTANCES);
    node.appendChild(child);
    for (i = 0; i < m_Instances.numInstances(); i++)
      addInstance(child, m_Instances.instance(i));
  }
 
  /**
   * builds up the XML structure based on the given data
   *
   * @param data  data to generate the XML from
   */
  public void setInstances(Instances data) {
    m_Instances = new Instances(data);
    clear();
    headerToXML();
    dataToXML();
  }
 
  /**
   * returns the current instances, either the ones that were set or the ones
   * that were generated from the XML structure.
   *
   * @return    the current instances
   */
  public Instances getInstances() {
    return m_Instances;
  }

  /**
   * returns the metadata, if any available underneath this node, otherwise
   * just null
   *
   * @param parent  the attribute node
   * @return    the metadata, or null if none found
   * @throws Exception  if generation fails
   */
  protected ProtectedProperties createMetadata(Element parent) throws Exception {
    ProtectedProperties  result;
    Properties    props;
    Vector    list;
    Element    node;
    Element    metanode;
    int      i;
   
    result = null;
   
    // find metadata node directly underneath this attribute, but not in
    // deeper nested attributes (e.g., within relational attributes)
    metanode = null;
    list     = getChildTags(parent, TAG_METADATA);
    if (list.size() > 0)
      metanode = (Element) list.get(0);
   
    // generate properties
    if (metanode != null) {
      props = new Properties();
      list  = getChildTags(metanode, TAG_PROPERTY);
      for (i = 0; i < list.size(); i++) {
  node = (Element) list.get(i);
  props.setProperty(node.getAttribute(ATT_NAME), getContent(node));
      }
      result = new ProtectedProperties(props);
    }
   
    return result;
  }

  /**
   * returns the labels listed underneath this (nominal) attribute in a
   * FastVector
   *
   * @param parent  the (nominal) attribute node
   * @return    the label vector
   * @throws Exception  if generation fails
   */
  protected FastVector createLabels(Element parent) throws Exception {
    FastVector    result;
    Vector    list;
    Element    node;
    Element    labelsnode;
    int      i;
   
    result = new FastVector();
   
    // find labels node directly underneath this attribute, but not in
    // deeper nested attributes (e.g., within relational attributes)
    labelsnode = null;
    list     = getChildTags(parent, TAG_LABELS);
    if (list.size() > 0)
      labelsnode = (Element) list.get(0);
   
    // retrieve all labels
    if (labelsnode != null) {
      list  = getChildTags(labelsnode, TAG_LABEL);
      for (i = 0; i < list.size(); i++) {
  node = (Element) list.get(i);
  result.addElement(getContent(node));
      }
    }
   
    return result;
  }
 
  /**
   * creates an Attribute from the given XML node
   *
   * @param node  the node with the setup
   * @return    the configured Attribute
   * @throws Exception  if generation fails, e.g., due to unknown attribute type
   */
  protected Attribute createAttribute(Element node) throws Exception {
    String    typeStr;
    String    name;
    int      type;
    Attribute    result;
    FastVector    values;
    ProtectedProperties  metadata;
    Vector    list;
    FastVector    atts;
   
    result = null;
   
    // name
    name = node.getAttribute(ATT_NAME);

    // type
    typeStr = node.getAttribute(ATT_TYPE);
    if (typeStr.equals(VAL_NUMERIC))
      type = Attribute.NUMERIC;
    else if (typeStr.equals(VAL_DATE))
      type = Attribute.DATE;
    else if (typeStr.equals(VAL_NOMINAL))
      type = Attribute.NOMINAL;
    else if (typeStr.equals(VAL_STRING))
      type = Attribute.STRING;
    else if (typeStr.equals(VAL_RELATIONAL))
      type = Attribute.RELATIONAL;
    else
      throw new Exception(
    "Attribute type '" + typeStr + "' is not supported!");

    // metadata
    metadata = createMetadata(node);
   
    switch (type) {
      case Attribute.NUMERIC:
  if (metadata == null)
    result = new Attribute(name);
  else
    result = new Attribute(name, metadata);
  break;

      case Attribute.DATE:
  if (metadata == null)
    result = new Attribute(name, node.getAttribute(ATT_FORMAT));
  else
    result = new Attribute(name, node.getAttribute(ATT_FORMAT), metadata);
  break;
 
      case Attribute.NOMINAL:
  values = createLabels(node);
  if (metadata == null)
    result = new Attribute(name, values);
  else
    result = new Attribute(name, values, metadata);
  break;
 
      case Attribute.STRING:
  if (metadata == null)
    result = new Attribute(name, (FastVector) null);
  else
    result = new Attribute(name, (FastVector) null, metadata);
  break;
 
      case Attribute.RELATIONAL:
  list = getChildTags(node, TAG_ATTRIBUTES);
  node = (Element) list.get(0);
  atts = createAttributes(node, new int[1]);
  if (metadata == null)
    result = new Attribute(name, new Instances(name, atts, 0));
  else
    result = new Attribute(name, new Instances(name, atts, 0), metadata);
  break;
    }
   
    return result;
  }
 
  /**
   * returns a list of generated attributes
   *
   * @param parent  the attributes node
   * @param classIndex  array of length 1 to return the class index, if any
   * @return    the vector with the generated attributes
   * @throws Exception  if generation fails, e.g., due to unknown attribute type
   */
  protected FastVector createAttributes(Element parent, int[] classIndex) throws Exception {
    Vector  list;
    FastVector  result;
    int    i;
    Element  node;
    Attribute  att;

    result        = new FastVector();
    classIndex[0] = -1;
   
    list = getChildTags(parent, TAG_ATTRIBUTE);
    for (i = 0; i < list.size(); i++) {
      node = (Element) list.get(i);
      att = createAttribute(node);
      if (node.getAttribute(ATT_CLASS).equals(VAL_YES))
  classIndex[0] = i;
      result.addElement(att);
    }
   
    return result;
  }
 
  /**
   * creates an Instance from the given XML node
   *
   * @param header  the data this instance will belong to
   * @param parent  the instance node
   * @return    the configured Instance
   * @throws Exception  if generation fails, e.g., due to unknown attribute type
   */
  protected Instance createInstance(Instances header, Element parent) throws Exception {
    Instance  result;
    Element  node;
    Element  child;
    boolean  sparse;
    int    i;
    int    index;
    Vector  list;
    Vector  subList;
    double[]  values;
    String  content;
    double  weight;
    Instances  data;
   
    result = null;

    // sparse?
    sparse = (parent.getAttribute(ATT_TYPE).equals(VAL_SPARSE));
    values = new double[header.numAttributes()];
   
    // weight
    if (parent.getAttribute(ATT_WEIGHT).length() != 0)
      weight = Double.parseDouble(parent.getAttribute(ATT_WEIGHT));
    else
      weight = 1.0;
   
    list = getChildTags(parent, TAG_VALUE);
    for (i = 0; i < list.size(); i++) {
      node = (Element) list.get(i);
     
      // determine index
      if (sparse)
  index = Integer.parseInt(node.getAttribute(ATT_INDEX)) - 1;
      else
  index = i;

      // set value
      if (node.getAttribute(ATT_MISSING).equals(VAL_YES)) {
  values[index] = Instance.missingValue();
      }
      else {
  content = getContent(node);
  switch (header.attribute(index).type()) {
    case Attribute.NUMERIC:
      values[index] = Double.parseDouble(content);
      break;
     
    case Attribute.DATE:
      values[index] = header.attribute(index).parseDate(content);
      break;
     
    case Attribute.NOMINAL:
      values[index] = header.attribute(index).indexOfValue(content);
      break;
     
    case Attribute.STRING:
      values[index] = header.attribute(index).addStringValue(content);
      break;
     
    case Attribute.RELATIONAL:
      subList       = getChildTags(node, TAG_INSTANCES);
      child         = (Element) subList.get(0);
      data          = createInstances(header.attribute(index).relation(), child);
      values[index] = header.attribute(index).addRelation(data);
      break;
     
    default:
      throw new Exception(
    "Attribute type " + header.attribute(index).type()
    + " is not supported!")
  }
      }
    }
   
    // create instance
    if (sparse)
      result = new SparseInstance(weight, values);
    else
      result = new Instance(weight, values);
   
    return result;
  }
 
  /**
   * creates Instances from the given XML node
   *
   * @param header  the header of this data
   * @param parent  the instances node
   * @return    the generated Instances
   * @throws Exception  if generation fails, e.g., due to unknown attribute type
   */
  protected Instances createInstances(Instances header, Element parent) throws Exception {
    Instances  result;
    Vector  list;
    int    i;
   
    result = new Instances(header, 0);
   
    list = getChildTags(parent, TAG_INSTANCE);
    for (i = 0; i < list.size(); i++)
      result.add(createInstance(result, (Element) list.get(i)));
   
    return result;
  }
 
  /**
   * generates the header from the XML document
   *
   * @return    the generated header
   * @throws Exception  if generation fails
   */
  protected Instances headerFromXML() throws Exception {
    Instances  result;
    Element  root;
    Element  node;
    Vector  list;
    FastVector  atts;
    Version  version;
    int[]  classIndex;

    root = m_Document.getDocumentElement();
   
    // check version
    version = new Version();
    if (version.isOlder(root.getAttribute(ATT_VERSION)))
      System.out.println(
    "WARNING: loading data of version " + root.getAttribute(ATT_VERSION)
    + " with version " + Version.VERSION);
   
    // attributes
    list       = getChildTags(root, TAG_HEADER);
    node       = (Element) list.get(0);
    list       = getChildTags(node, TAG_ATTRIBUTES);
    node       = (Element) list.get(0);
    classIndex = new int[1];
    atts       = createAttributes(node, classIndex);

    // generate header
    result = new Instances(root.getAttribute(ATT_NAME), atts, 0);
    result.setClassIndex(classIndex[0]);
   
    return result;
  }
 
  /**
   * generates the complete dataset from the XML document
   *
   * @param header  the header structure
   * @return    the complete dataset
   * @throws Exception  if generation fails
   */
  protected Instances dataFromXML(Instances header) throws Exception {
    Instances  result;
    Element  node;
    Vector  list;

    list   = getChildTags(m_Document.getDocumentElement(), TAG_BODY);
    node   = (Element) list.get(0);
    list   = getChildTags(node, TAG_INSTANCES);
    node   = (Element) list.get(0);
    result = createInstances(header, node);
   
    return result;
  }
 
  /**
   * reads the XML structure from the given reader
   *
   * @param reader  the reader to get the XML from
   * @throws Exception  if
   */
  public void setXML(Reader reader) throws Exception {
    read(reader);
   
    // interprete XML structure
    m_Instances = dataFromXML(headerFromXML());
  }
 
  /**
   * Returns the revision string.
   *
   * @return    the revision
   */
  public String getRevision() {
    return RevisionUtils.extract("$Revision: 1.4 $");
  }
 
  /**
   * takes an XML document as first argument and then outputs the Instances
   * statistics
   *
   * @param args  the commandline options
   */
  public static void main(String[] args) {
    try {
      Reader r = null;
      if (args.length != 1) {
  throw (new Exception("Usage: XMLInstances <filename>"));
      }
      else {
  InputStream in = new FileInputStream(args[0]);
  // compressed file?
  if (args[0].endsWith(".gz"))
    in = new GZIPInputStream(in);
        r = new BufferedReader(new InputStreamReader(in));
     }
     
      if (args[0].endsWith(Instances.FILE_EXTENSION)) {
  XMLInstances i = new XMLInstances(new Instances(r));
  System.out.println(i.toString());
      }
      else {
  Instances i = new XMLInstances(r).getInstances();
  System.out.println(i.toSummaryString());
      }
    }
    catch (Exception ex) {
      ex.printStackTrace();
      System.err.println(ex.getMessage());
    }
  }
}
TOP

Related Classes of weka.core.xml.XMLInstances

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.