Package weka.core

Examples of weka.core.Instances


   * @param inputFormat     the input format to base the output format on
   * @return                the output format
   * @throws Exception      in case the determination goes wrong
   */
  protected Instances determineOutputFormat(Instances inputFormat) throws Exception {
    Instances      result;
    Attribute      att;
    ArrayList<Attribute>  atts;
    int        i;
   
    m_AttributeIndices.setUpper(inputFormat.numAttributes() - 1);
   
    // generate new header
    atts = new ArrayList<Attribute>();
    for (i = 0; i < inputFormat.numAttributes(); i++) {
      att = inputFormat.attribute(i);
      if (m_AttributeIndices.isInRange(i)) {
  if (m_ReplaceAll)
    atts.add(att.copy(att.name().replaceAll(m_Find, m_Replace)));
  else
    atts.add(att.copy(att.name().replaceFirst(m_Find, m_Replace)));
      }
      else {
  atts.add((Attribute) att.copy());
      }
    }
    result = new Instances(inputFormat.relationName(), atts, 0);
    result.setClassIndex(inputFormat.classIndex());
   
    return result;
  }
View Full Code Here


    int i, j;
    Random Rnd = new Random(m_seed);
    Remove delTransform = new Remove();
    delTransform.setInvertSelection(true);
    // copy the instances
    Instances trainCopy = new Instances(m_trainInstances);

    // count attributes set in the BitSet
    for (i = 0; i < m_numAttribs; i++) {
      if (subset.get(i)) {
        numAttributes++;
View Full Code Here

    }
    if (m_removeFilter == null) {

      // establish attributes to remove from first batch

      Instances toFilter = getInputFormat();
      int[] attsToDelete = new int[toFilter.numAttributes()];
      int numToDelete = 0;
      for(int i = 0; i < toFilter.numAttributes(); i++) {
  if (i==toFilter.classIndex()) continue; // skip class
  AttributeStats stats = toFilter.attributeStats(i);
  if (stats.distinctCount < 2) {
    // remove constant attributes
    attsToDelete[numToDelete++] = i;
  } else if (toFilter.attribute(i).isNominal()) {
    // remove nominal attributes that vary too much
    double variancePercent = (double) stats.distinctCount
      / (double)(stats.totalCount - stats.missingCount) * 100.0;
    if (variancePercent > m_maxVariancePercentage) {
        attsToDelete[numToDelete++] = i;
    }
  }
      }
     
      int[] finalAttsToDelete = new int[numToDelete];
      System.arraycopy(attsToDelete, 0, finalAttsToDelete, 0, numToDelete);
     
      m_removeFilter = new Remove();
      m_removeFilter.setAttributeIndicesArray(finalAttsToDelete);
      m_removeFilter.setInvertSelection(false);
      m_removeFilter.setInputFormat(toFilter);
     
      for (int i = 0; i < toFilter.numInstances(); i++) {
  m_removeFilter.input(toFilter.instance(i));
      }
      m_removeFilter.batchFinished();

      Instance processed;
      Instances outputDataset = m_removeFilter.getOutputFormat();
   
      // restore old relation name to hide attribute filter stamp
      outputDataset.setRelationName(toFilter.relationName());
   
      setOutputFormat(outputDataset);
      while ((processed = m_removeFilter.output()) != null) {
  processed.setDataset(outputDataset);
  push(processed);
View Full Code Here

    // can classifier handle the data?
    getCapabilities().testWithFail(instances);

    // remove instances with missing class
    Instances data = new Instances(instances);
    data.deleteWithMissingClass();

    // only class? -> build ZeroR model
    if (data.numAttributes() == 1) {
      System.err.println(
    "Cannot build model (only class attribute present in data!), "
    + "using ZeroR model instead!");
      m_ZeroR = new weka.classifiers.rules.ZeroR();
      m_ZeroR.buildClassifier(data);
View Full Code Here

   * @param instanceInfo  the data to test
   * @throws Exception    if the test fails
   */
  protected void testInputFormat(Instances instanceInfo) throws Exception {
    for (int i = 0; i < getRanges().length; i++) {
      Instances newi = new Instances(instanceInfo, 0);
      if (instanceInfo.size() > 0){
  newi.add((Instance)instanceInfo.get(0).copy());
      }
      Range range = getRanges()[i];
      range.setUpper(instanceInfo.numAttributes() - 1);
      Instances subset = generateSubset(newi, range);
      getFilters()[i].setInputFormat(subset);
    }
  }
View Full Code Here

   * @throws Exception  if creation fails
   */
  protected Instances generateSubset(Instances data, Range range) throws Exception {
    Remove    filter;
    StringBuilder  atts;
    Instances    result;
    int[]    indices;
    int      i;

    // determine attributes
    indices = range.getSelection();
View Full Code Here

   * @param prefix  the prefix for the attributes
   * @return    a copy of the data with the attributes renamed
   * @throws Exception  if renaming fails
   */
  protected Instances renameAttributes(Instances data, String prefix) throws Exception {
    Instances      result;
    int        i;
    ArrayList<Attribute>  atts;

    // rename attributes
    atts = new ArrayList<Attribute>();
    for (i = 0; i < data.numAttributes(); i++) {
      if (i == data.classIndex())
  atts.add((Attribute) data.attribute(i).copy());
      else
  atts.add(data.attribute(i).copy(prefix + data.attribute(i).name()));
    }

    // create new dataset
    result = new Instances(data.relationName(), atts, data.numInstances());
    for (i = 0; i < data.numInstances(); i++) {
      result.add((Instance) data.instance(i).copy());
    }

    // set class if present
    if (data.classIndex() > -1)
      result.setClassIndex(data.classIndex());

    return result;
  }
View Full Code Here

   * @throws Exception      in case the determination goes wrong
   * @see                   #hasImmediateOutputFormat()
   * @see                   #batchFinished()
   */
  protected Instances determineOutputFormat(Instances inputFormat) throws Exception {
    Instances       result;
    Instances      processed;
    int             i;
    int        n;
    ArrayList<Attribute>  atts;
    Attribute      att;

    if (!isFirstBatchDone()) {
      // we need the full dataset here, see process(Instances)
      if (inputFormat.numInstances() == 0)
  return null;

      checkDimensions();

      // determine unused indices
      determineUnusedIndices(inputFormat);

      atts = new ArrayList<Attribute>();
      for (i = 0; i < getFilters().length; i++) {
  if (!isFirstBatchDone()) {
    // generate subset
    processed = generateSubset(inputFormat, getRange(i));
    // set input format
    if (!getFilter(i).setInputFormat(processed))
      Filter.useFilter(processed, getFilter(i));
  }

  // get output format
  processed = getFilter(i).getOutputFormat();

  // rename attributes
  processed = renameAttributes(processed, "filtered-" + i + "-");

  // add attributes
  for (n = 0; n < processed.numAttributes(); n++) {
    if (n == processed.classIndex())
      continue;
    atts.add((Attribute) processed.attribute(n).copy());
  }
      }

      // add unused attributes
      if (!getRemoveUnused()) {
  for (i = 0; i < m_IndicesUnused.length; i++) {
    att = inputFormat.attribute(m_IndicesUnused[i]);
    atts.add(att.copy("unfiltered-" + att.name()));
  }
      }

      // add class if present
      if (inputFormat.classIndex() > -1)
  atts.add((Attribute) inputFormat.classAttribute().copy());

      // generate new dataset
      result = new Instances(inputFormat.relationName(), atts, 0);
      if (inputFormat.classIndex() > -1)
  result.setClassIndex(result.numAttributes() - 1);
    }
    else {
      result = getOutputFormat();
View Full Code Here

   * @return            the modified data
   * @throws Exception  in case the processing goes wrong
   * @see               #batchFinished()
   */
  protected Instances process(Instances instances) throws Exception {
    Instances    result;
    int            i;
    int      n;
    int      m;
    int      index;
    Instances[]    processed;
    Instance    inst;
    Instance    newInst;
    double[]    values;
    Vector    errors;

    if (!isFirstBatchDone()) {
      checkDimensions();

      // set upper limits
      for (i = 0; i < m_Ranges.length; i++)
  m_Ranges[i].setUpper(instances.numAttributes() - 1);

      // determine unused indices
      determineUnusedIndices(instances);
    }

    // pass data through all filters
    processed = new Instances[getFilters().length];
    for (i = 0; i < getFilters().length; i++) {
      processed[i] = generateSubset(instances, getRange(i));
      if (!isFirstBatchDone())
  getFilter(i).setInputFormat(processed[i]);
      processed[i] = Filter.useFilter(processed[i], getFilter(i));
    }

    // set output format (can only be determined with full dataset, hence here)
    if (!isFirstBatchDone()) {
      result = determineOutputFormat(instances);
      setOutputFormat(result);
    }
    else {
      result = getOutputFormat();
    }

    // check whether all filters didn't change the number of instances
    errors = new Vector();
    for (i = 0; i < processed.length; i++) {
      if (processed[i].numInstances() != instances.numInstances())
  errors.add(new Integer(i));
    }
    if (errors.size() > 0)
      throw new IllegalStateException(
    "The following filter(s) changed the number of instances: " + errors);

    // assemble data
    for (i = 0; i < instances.numInstances(); i++) {
      inst   = instances.instance(i);
      values = new double[result.numAttributes()];

      // filtered data
      index = 0;
      for (n = 0; n < processed.length; n++) {
  for (m = 0; m < processed[n].numAttributes(); m++) {
    if (m == processed[n].classIndex())
      continue;
    values[index] = processed[n].instance(i).value(m);
    index++;
  }
      }

      // unused attributes
      if (!getRemoveUnused()) {
  for (n = 0; n < m_IndicesUnused.length; n++) {
    values[index] = inst.value(m_IndicesUnused[n]);
    index++;
  }
      }

      // class
      if (instances.classIndex() > -1)
  values[values.length - 1] = inst.value(instances.classIndex());

      // generate and add instance
      if (inst instanceof SparseInstance)
  newInst = new SparseInstance(instances.instance(i).weight(), values);
      else
  newInst = new DenseInstance(instances.instance(i).weight(), values);
      result.add(newInst);
    }

    return result;
  }
View Full Code Here

   * @see     #setPriors(Instances)
   */
  public Evaluation(Instances data, CostMatrix costMatrix)
  throws Exception {

    m_Header = new Instances(data, 0);
    m_NumClasses = data.numClasses();
    m_NumFolds = 1;
    m_ClassIsNominal = data.classAttribute().isNominal();

    if (m_ClassIsNominal) {
View Full Code Here

TOP

Related Classes of weka.core.Instances

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.