Examples of Instances

co.cask.cdap.proto.Instances
Represents the number of instances a program currently is running with.
de.danielbechler.diff.access.Instances
@author Daniel Bechler
org.apache.karaf.instance.core.internal.Instances
org.integratedmodelling.riskwiz.learning.data.Instances
all the instances in the file (ARFF, CSV, XRFF, ...) DataSource source = new DataSource(filename); Instances instances = source.getDataSet(); // Make the last attribute be the class instances.setClassIndex(instances.numAttributes() - 1); // Print header and instances. System.out.println("\nDataset:\n"); System.out.println(instances); ...
All methods that change a set of instances are safe, ie. a change of a set of instances does not affect any other sets of instances. All methods that change a datasets's attribute information clone the dataset before it is changed. @author Eibe Frank (eibe@cs.waikato.ac.nz) @author Len Trigg (trigg@cs.waikato.ac.nz) @author FracPete (fracpete at waikato dot ac dot nz) @version $Revision: 1.73 $
weka.core.Instances
all the instances in the file (ARFF, CSV, XRFF, ...) DataSource source = new DataSource(filename); Instances instances = source.getDataSet(); // Make the last attribute be the class instances.setClassIndex(instances.numAttributes() - 1); // Print header and instances. System.out.println("\nDataset:\n"); System.out.println(instances); ...
All methods that change a set of instances are safe, ie. a change of a set of instances does not affect any other sets of instances. All methods that change a datasets's attribute information clone the dataset before it is changed. @author Eibe Frank (eibe@cs.waikato.ac.nz) @author Len Trigg (trigg@cs.waikato.ac.nz) @author FracPete (fracpete at waikato dot ac dot nz) @version $Revision: 6996 $

Examples of weka.core.Instances

   * @param prefix  the prefix for the attributes
   * @return    a copy of the data with the attributes renamed
   * @throws Exception  if renaming fails
   */
  protected Instances renameAttributes(Instances data, String prefix) throws Exception {
    Instances      result;
    int        i;
    ArrayList<Attribute>  atts;


    // rename attributes
    atts = new ArrayList<Attribute>();
    for (i = 0; i < data.numAttributes(); i++) {
      if (i == data.classIndex())
  atts.add((Attribute) data.attribute(i).copy());
      else
  atts.add(data.attribute(i).copy(prefix + data.attribute(i).name()));
    }


    // create new dataset
    result = new Instances(data.relationName(), atts, data.numInstances());
    for (i = 0; i < data.numInstances(); i++) {
      result.add((Instance) data.instance(i).copy());
    }


    // set class if present
    if (data.classIndex() > -1)
      result.setClassIndex(data.classIndex());


    return result;
  }

View Full Code Here

Examples of weka.core.Instances

   * @throws Exception      in case the determination goes wrong
   * @see                   #hasImmediateOutputFormat()
   * @see                   #batchFinished()
   */
  protected Instances determineOutputFormat(Instances inputFormat) throws Exception {
    Instances       result;
    Instances      processed;
    int             i;
    int        n;
    ArrayList<Attribute>  atts;
    Attribute      att;


    if (!isFirstBatchDone()) {
      // we need the full dataset here, see process(Instances)
      if (inputFormat.numInstances() == 0)
  return null;


      checkDimensions();


      // determine unused indices
      determineUnusedIndices(inputFormat);


      atts = new ArrayList<Attribute>();
      for (i = 0; i < getFilters().length; i++) {
  if (!isFirstBatchDone()) {
    // generate subset
    processed = generateSubset(inputFormat, getRange(i));
    // set input format
    if (!getFilter(i).setInputFormat(processed))
      Filter.useFilter(processed, getFilter(i));
  }


  // get output format
  processed = getFilter(i).getOutputFormat();


  // rename attributes
  processed = renameAttributes(processed, "filtered-" + i + "-");


  // add attributes
  for (n = 0; n < processed.numAttributes(); n++) {
    if (n == processed.classIndex())
      continue;
    atts.add((Attribute) processed.attribute(n).copy());
  }
      }


      // add unused attributes
      if (!getRemoveUnused()) {
  for (i = 0; i < m_IndicesUnused.length; i++) {
    att = inputFormat.attribute(m_IndicesUnused[i]);
    atts.add(att.copy("unfiltered-" + att.name()));
  }
      }


      // add class if present
      if (inputFormat.classIndex() > -1)
  atts.add((Attribute) inputFormat.classAttribute().copy());


      // generate new dataset
      result = new Instances(inputFormat.relationName(), atts, 0);
      if (inputFormat.classIndex() > -1)
  result.setClassIndex(result.numAttributes() - 1);
    }
    else {
      result = getOutputFormat();

View Full Code Here

Examples of weka.core.Instances

   * @return            the modified data
   * @throws Exception  in case the processing goes wrong
   * @see               #batchFinished()
   */
  protected Instances process(Instances instances) throws Exception {
    Instances    result;
    int            i;
    int      n;
    int      m;
    int      index;
    Instances[]    processed;
    Instance    inst;
    Instance    newInst;
    double[]    values;
    Vector    errors;


    if (!isFirstBatchDone()) {
      checkDimensions();


      // set upper limits
      for (i = 0; i < m_Ranges.length; i++)
  m_Ranges[i].setUpper(instances.numAttributes() - 1);


      // determine unused indices
      determineUnusedIndices(instances);
    }


    // pass data through all filters
    processed = new Instances[getFilters().length];
    for (i = 0; i < getFilters().length; i++) {
      processed[i] = generateSubset(instances, getRange(i));
      if (!isFirstBatchDone())
  getFilter(i).setInputFormat(processed[i]);
      processed[i] = Filter.useFilter(processed[i], getFilter(i));
    }


    // set output format (can only be determined with full dataset, hence here)
    if (!isFirstBatchDone()) {
      result = determineOutputFormat(instances);
      setOutputFormat(result);
    }
    else {
      result = getOutputFormat();
    }


    // check whether all filters didn't change the number of instances
    errors = new Vector();
    for (i = 0; i < processed.length; i++) {
      if (processed[i].numInstances() != instances.numInstances())
  errors.add(new Integer(i));
    }
    if (errors.size() > 0)
      throw new IllegalStateException(
    "The following filter(s) changed the number of instances: " + errors);


    // assemble data
    for (i = 0; i < instances.numInstances(); i++) {
      inst   = instances.instance(i);
      values = new double[result.numAttributes()];


      // filtered data
      index = 0;
      for (n = 0; n < processed.length; n++) {
  for (m = 0; m < processed[n].numAttributes(); m++) {
    if (m == processed[n].classIndex())
      continue;
    values[index] = processed[n].instance(i).value(m);
    index++;
  }
      }


      // unused attributes
      if (!getRemoveUnused()) {
  for (n = 0; n < m_IndicesUnused.length; n++) {
    values[index] = inst.value(m_IndicesUnused[n]);
    index++;
  }
      }


      // class
      if (instances.classIndex() > -1)
  values[values.length - 1] = inst.value(instances.classIndex());


      // generate and add instance
      if (inst instanceof SparseInstance)
  newInst = new SparseInstance(instances.instance(i).weight(), values);
      else
  newInst = new DenseInstance(instances.instance(i).weight(), values);
      result.add(newInst);
    }


    return result;
  }

View Full Code Here

Examples of weka.core.Instances

   * @see     #setPriors(Instances)
   */
  public Evaluation(Instances data, CostMatrix costMatrix)
  throws Exception {


    m_Header = new Instances(data, 0);
    m_NumClasses = data.numClasses();
    m_NumFolds = 1;
    m_ClassIsNominal = data.classAttribute().isNominal();


    if (m_ClassIsNominal) {

View Full Code Here

Examples of weka.core.Instances

    // Check if any predictions have been collected
    if (m_Predictions == null) {
      return Utils.missingValue();
    } else {
      ThresholdCurve tc = new ThresholdCurve();
      Instances result = tc.getCurve(m_Predictions, classIndex);
      return ThresholdCurve.getROCArea(result);
    }
  }

View Full Code Here

Examples of weka.core.Instances

                                 Instances data, int numFolds, Random random,
                                 Object... forPredictionsPrinting)
  throws Exception {


    // Make a copy of the data we can reorder
    data = new Instances(data);
    data.randomize(random);
    if (data.classAttribute().isNominal()) {
      data.stratify(numFolds);
    }


    // We assume that the first element is a
    // weka.classifiers.evaluation.output.prediction.AbstractOutput object
    AbstractOutput classificationOutput = null;
    if (forPredictionsPrinting.length > 0) {
      // print the header first
      classificationOutput = (AbstractOutput) forPredictionsPrinting[0];
      classificationOutput.setHeader(data);
      classificationOutput.printHeader();
    }


    // Do the folds
    for (int i = 0; i < numFolds; i++) {
      Instances train = data.trainCV(numFolds, i, random);
      setPriors(train);
      Classifier copiedClassifier = AbstractClassifier.makeCopy(classifier);
      copiedClassifier.buildClassifier(train);
      Instances test = data.testCV(numFolds, i);
      evaluateModel(copiedClassifier, test, forPredictionsPrinting);
    }
    m_NumFolds = numFolds;


    if (classificationOutput != null)

View Full Code Here

Examples of weka.core.Instances

   * @return a string describing the results
   */
  public static String evaluateModel(Classifier classifier,
      String [] options) throws Exception {


    Instances train = null, tempTrain, test = null, template = null;
    int seed = 1, folds = 10, classIndex = -1;
    boolean noCrossValidation = false;
    String trainFileName, testFileName, sourceClass,
    classIndexString, seedString, foldsString, objectInputFileName,
    objectOutputFileName;
    boolean noOutput = false,
    trainStatistics = true,
    printMargins = false, printComplexityStatistics = false,
    printGraph = false, classStatistics = false, printSource = false;
    StringBuffer text = new StringBuffer();
    DataSource trainSource = null, testSource = null;
    ObjectInputStream objectInputStream = null;
    BufferedInputStream xmlInputStream = null;
    CostMatrix costMatrix = null;
    StringBuffer schemeOptionsText = null;
    long trainTimeStart = 0, trainTimeElapsed = 0,
    testTimeStart = 0, testTimeElapsed = 0;
    String xml = "";
    String[] optionsTmp = null;
    Classifier classifierBackup;
    Classifier classifierClassifications = null;
    int actualClassIndex = -1;  // 0-based class index
    String splitPercentageString = "";
    double splitPercentage = -1;
    boolean preserveOrder = false;
    boolean trainSetPresent = false;
    boolean testSetPresent = false;
    String thresholdFile;
    String thresholdLabel;
    StringBuffer predsBuff = null; // predictions from cross-validation
    AbstractOutput classificationOutput = null;


    // help requested?
    if (Utils.getFlag("h", options) || Utils.getFlag("help", options)) {


      // global info requested as well?
      boolean globalInfo = Utils.getFlag("synopsis", options) ||
        Utils.getFlag("info", options);


      throw new Exception("\nHelp requested."
          + makeOptionString(classifier, globalInfo));
    }


    try {
      // do we get the input from XML instead of normal parameters?
      xml = Utils.getOption("xml", options);
      if (!xml.equals(""))
        options = new XMLOptions(xml).toArray();


      // is the input model only the XML-Options, i.e. w/o built model?
      optionsTmp = new String[options.length];
      for (int i = 0; i < options.length; i++)
        optionsTmp[i] = options[i];


      String tmpO = Utils.getOption('l', optionsTmp);
      //if (Utils.getOption('l', optionsTmp).toLowerCase().endsWith(".xml")) {
      if (tmpO.endsWith(".xml")) {
        // try to load file as PMML first
        boolean success = false;
        try {
          PMMLModel pmmlModel = PMMLFactory.getPMMLModel(tmpO);
          if (pmmlModel instanceof PMMLClassifier) {
            classifier = ((PMMLClassifier)pmmlModel);
            success = true;
          }
        } catch (IllegalArgumentException ex) {
          success = false;
        }
        if (!success) {
          // load options from serialized data  ('-l' is automatically erased!)
          XMLClassifier xmlserial = new XMLClassifier();
          OptionHandler cl = (OptionHandler) xmlserial.read(Utils.getOption('l', options));


          // merge options
          optionsTmp = new String[options.length + cl.getOptions().length];
          System.arraycopy(cl.getOptions(), 0, optionsTmp, 0, cl.getOptions().length);
          System.arraycopy(options, 0, optionsTmp, cl.getOptions().length, options.length);
          options = optionsTmp;
        }
      }


      noCrossValidation = Utils.getFlag("no-cv", options);
      // Get basic options (options the same for all schemes)
      classIndexString = Utils.getOption('c', options);
      if (classIndexString.length() != 0) {
        if (classIndexString.equals("first"))
          classIndex = 1;
        else if (classIndexString.equals("last"))
          classIndex = -1;
        else
          classIndex = Integer.parseInt(classIndexString);
      }
      trainFileName = Utils.getOption('t', options);
      objectInputFileName = Utils.getOption('l', options);
      objectOutputFileName = Utils.getOption('d', options);
      testFileName = Utils.getOption('T', options);
      foldsString = Utils.getOption('x', options);
      if (foldsString.length() != 0) {
        folds = Integer.parseInt(foldsString);
      }
      seedString = Utils.getOption('s', options);
      if (seedString.length() != 0) {
        seed = Integer.parseInt(seedString);
      }
      if (trainFileName.length() == 0) {
        if (objectInputFileName.length() == 0) {
          throw new Exception("No training file and no object input file given.");
        }
        if (testFileName.length() == 0) {
          throw new Exception("No training file and no test file given.");
        }
      } else if ((objectInputFileName.length() != 0) &&
          ((!(classifier instanceof UpdateableClassifier)) ||
           (testFileName.length() == 0))) {
        throw new Exception("Classifier not incremental, or no " +
            "test file provided: can't "+
            "use both train and model file.");
      }
      try {
        if (trainFileName.length() != 0) {
          trainSetPresent = true;
          trainSource = new DataSource(trainFileName);
        }
        if (testFileName.length() != 0) {
          testSetPresent = true;
          testSource = new DataSource(testFileName);
        }
        if (objectInputFileName.length() != 0) {
          if (objectInputFileName.endsWith(".xml")) {
            // if this is the case then it means that a PMML classifier was
            // successfully loaded earlier in the code
            objectInputStream = null;
            xmlInputStream = null;
          } else {
            InputStream is = new FileInputStream(objectInputFileName);
            if (objectInputFileName.endsWith(".gz")) {
              is = new GZIPInputStream(is);
            }
            // load from KOML?
            if (!(objectInputFileName.endsWith(".koml") && KOML.isPresent()) ) {
              objectInputStream = new ObjectInputStream(is);
              xmlInputStream    = null;
            }
            else {
              objectInputStream = null;
              xmlInputStream    = new BufferedInputStream(is);
            }
          }
        }
      } catch (Exception e) {
        throw new Exception("Can't open file " + e.getMessage() + '.');
      }
      if (testSetPresent) {
        template = test = testSource.getStructure();
        if (classIndex != -1) {
          test.setClassIndex(classIndex - 1);
        } else {
          if ( (test.classIndex() == -1) || (classIndexString.length() != 0) )
            test.setClassIndex(test.numAttributes() - 1);
        }
        actualClassIndex = test.classIndex();
      }
      else {
        // percentage split
        splitPercentageString = Utils.getOption("split-percentage", options);
        if (splitPercentageString.length() != 0) {
          if (foldsString.length() != 0)
            throw new Exception(
                "Percentage split cannot be used in conjunction with "
                + "cross-validation ('-x').");
          splitPercentage = Double.parseDouble(splitPercentageString);
          if ((splitPercentage <= 0) || (splitPercentage >= 100))
            throw new Exception("Percentage split value needs be >0 and <100.");
        }
        else {
          splitPercentage = -1;
        }
        preserveOrder = Utils.getFlag("preserve-order", options);
        if (preserveOrder) {
          if (splitPercentage == -1)
            throw new Exception("Percentage split ('-percentage-split') is missing.");
        }
        // create new train/test sources
        if (splitPercentage > 0) {
          testSetPresent = true;
          Instances tmpInst = trainSource.getDataSet(actualClassIndex);
          if (!preserveOrder)
            tmpInst.randomize(new Random(seed));
          int trainSize = 
            (int) Math.round(tmpInst.numInstances() * splitPercentage / 100);
          int testSize  = tmpInst.numInstances() - trainSize;
          Instances trainInst = new Instances(tmpInst, 0, trainSize);
          Instances testInst  = new Instances(tmpInst, trainSize, testSize);
          trainSource = new DataSource(trainInst);
          testSource  = new DataSource(testInst);
          template = test = testSource.getStructure();
          if (classIndex != -1) {
            test.setClassIndex(classIndex - 1);
          } else {
            if ( (test.classIndex() == -1) || (classIndexString.length() != 0) )
              test.setClassIndex(test.numAttributes() - 1);
          }
          actualClassIndex = test.classIndex();
        }
      }
      if (trainSetPresent) {
        template = train = trainSource.getStructure();
        if (classIndex != -1) {
          train.setClassIndex(classIndex - 1);
        } else {
          if ( (train.classIndex() == -1) || (classIndexString.length() != 0) )
            train.setClassIndex(train.numAttributes() - 1);
        }
        actualClassIndex = train.classIndex();
        if (!(classifier instanceof weka.classifiers.misc.InputMappedClassifier)) {
          if ((testSetPresent) && !test.equalHeaders(train)) {
            throw new IllegalArgumentException("Train and test file not compatible!\n" + test.equalHeadersMsg(train));
          }
        }
      }
      if (template == null) {
        throw new Exception("No actual dataset provided to use as template");
      }
      costMatrix = handleCostOption(
          Utils.getOption('m', options), template.numClasses());


      classStatistics = Utils.getFlag('i', options);
      noOutput = Utils.getFlag('o', options);
      trainStatistics = !Utils.getFlag('v', options);
      printComplexityStatistics = Utils.getFlag('k', options);
      printMargins = Utils.getFlag('r', options);
      printGraph = Utils.getFlag('g', options);
      sourceClass = Utils.getOption('z', options);
      printSource = (sourceClass.length() != 0);
      thresholdFile = Utils.getOption("threshold-file", options);
      thresholdLabel = Utils.getOption("threshold-label", options);


      String classifications = Utils.getOption("classifications", options);
      String classificationsOld = Utils.getOption("p", options);
      if (classifications.length() > 0) {
        noOutput = true;
        classificationOutput = AbstractOutput.fromCommandline(classifications);
        classificationOutput.setHeader(template);
      }
      // backwards compatible with old "-p range" and "-distribution" options
      else if (classificationsOld.length() > 0) {
        noOutput = true;
        classificationOutput = new PlainText();
        classificationOutput.setHeader(template);
        if (!classificationsOld.equals("0"))
          classificationOutput.setAttributes(classificationsOld);
        classificationOutput.setOutputDistribution(Utils.getFlag("distribution", options));
      }
      // -distribution flag needs -p option
      else {
        if (Utils.getFlag("distribution", options))
          throw new Exception("Cannot print distribution without '-p' option!");
      }


      // if no training file given, we don't have any priors
      if ( (!trainSetPresent) && (printComplexityStatistics) )
        throw new Exception("Cannot print complexity statistics ('-k') without training file ('-t')!");


      // If a model file is given, we can't process
      // scheme-specific options
      if (objectInputFileName.length() != 0) {
        Utils.checkForRemainingOptions(options);
      } else {


        // Set options for classifier
        if (classifier instanceof OptionHandler) {
          for (int i = 0; i < options.length; i++) {
            if (options[i].length() != 0) {
              if (schemeOptionsText == null) {
                schemeOptionsText = new StringBuffer();
              }
              if (options[i].indexOf(' ') != -1) {
                schemeOptionsText.append('"' + options[i] + "\" ");
              } else {
                schemeOptionsText.append(options[i] + " ");
              }
            }
          }
          ((OptionHandler)classifier).setOptions(options);
        }
      }


      Utils.checkForRemainingOptions(options);
    } catch (Exception e) {
      throw new Exception("\nWeka exception: " + e.getMessage()
          + makeOptionString(classifier, false));
    }


    if (objectInputFileName.length() != 0) {
      // Load classifier from file
      if (objectInputStream != null) {
        classifier = (Classifier) objectInputStream.readObject();
        // try and read a header (if present)
        Instances savedStructure = null;
        try {
          savedStructure = (Instances) objectInputStream.readObject();
        } catch (Exception ex) {
          // don't make a fuss
        }
        if (savedStructure != null) {
          // test for compatibility with template
          if (!template.equalHeaders(savedStructure)) {
            throw new Exception("training and test set are not compatible\n" + template.equalHeadersMsg(savedStructure));
          }
        }
        objectInputStream.close();
      }
      else if (xmlInputStream != null) {
        // whether KOML is available has already been checked (objectInputStream would null otherwise)!
        classifier = (Classifier) KOML.read(xmlInputStream);
        xmlInputStream.close();
      }
    }
    
    // Setup up evaluation objects
    Evaluation trainingEvaluation = new Evaluation(new Instances(template, 0), costMatrix);
    Evaluation testingEvaluation = new Evaluation(new Instances(template, 0), costMatrix);
    if (classifier instanceof weka.classifiers.misc.InputMappedClassifier) {
      Instances mappedClassifierHeader = 
        ((weka.classifiers.misc.InputMappedClassifier)classifier).
          getModelHeader(new Instances(template, 0));
            
      trainingEvaluation = new Evaluation(new Instances(mappedClassifierHeader, 0), costMatrix);
      testingEvaluation = new Evaluation(new Instances(mappedClassifierHeader, 0), costMatrix);
    }


    // disable use of priors if no training file given
    if (!trainSetPresent)
      testingEvaluation.useNoPriors();


    // backup of fully setup classifier for cross-validation
    classifierBackup = AbstractClassifier.makeCopy(classifier);


    // Build the classifier if no object file provided
    if ((classifier instanceof UpdateableClassifier) &&
        (testSetPresent || noCrossValidation) &&
        (costMatrix == null) &&
        (trainSetPresent)) {
      // Build classifier incrementally
      trainingEvaluation.setPriors(train);
      testingEvaluation.setPriors(train);
      trainTimeStart = System.currentTimeMillis();
      if (objectInputFileName.length() == 0) {
        classifier.buildClassifier(train);
      }
      Instance trainInst;
      while (trainSource.hasMoreElements(train)) {
        trainInst = trainSource.nextElement(train);
        trainingEvaluation.updatePriors(trainInst);
        testingEvaluation.updatePriors(trainInst);
        ((UpdateableClassifier)classifier).updateClassifier(trainInst);
      }
      trainTimeElapsed = System.currentTimeMillis() - trainTimeStart;
    } else if (objectInputFileName.length() == 0) {
      // Build classifier in one go
      tempTrain = trainSource.getDataSet(actualClassIndex);
      
      if (classifier instanceof weka.classifiers.misc.InputMappedClassifier &&
          !trainingEvaluation.getHeader().equalHeaders(tempTrain)) {
        // we need to make a new dataset that maps the training instances to
        // the structure expected by the mapped classifier - this is only
        // to ensure that the structure and priors computed by the *testing*
        // evaluation object is correct with respect to the mapped classifier
        Instances mappedClassifierDataset = 
          ((weka.classifiers.misc.InputMappedClassifier)classifier).
            getModelHeader(new Instances(template, 0));
        for (int zz = 0; zz < tempTrain.numInstances(); zz++) {
          Instance mapped = ((weka.classifiers.misc.InputMappedClassifier)classifier).
            constructMappedInstance(tempTrain.instance(zz));
          mappedClassifierDataset.add(mapped);
        }
        tempTrain = mappedClassifierDataset;
      }
      
      trainingEvaluation.setPriors(tempTrain);
      testingEvaluation.setPriors(tempTrain);
      trainTimeStart = System.currentTimeMillis();
      classifier.buildClassifier(tempTrain);
      trainTimeElapsed = System.currentTimeMillis() - trainTimeStart;
    }


    // backup of fully trained classifier for printing the classifications
    if (classificationOutput != null) {
      classifierClassifications = AbstractClassifier.makeCopy(classifier);
      if (classifier instanceof weka.classifiers.misc.InputMappedClassifier) {
        classificationOutput.setHeader(trainingEvaluation.getHeader());
      }
    }


    // Save the classifier if an object output file is provided
    if (objectOutputFileName.length() != 0) {
      OutputStream os = new FileOutputStream(objectOutputFileName);
      // binary
      if (!(objectOutputFileName.endsWith(".xml") || (objectOutputFileName.endsWith(".koml") && KOML.isPresent()))) {
        if (objectOutputFileName.endsWith(".gz")) {
          os = new GZIPOutputStream(os);
        }
        ObjectOutputStream objectOutputStream = new ObjectOutputStream(os);
        objectOutputStream.writeObject(classifier);
        if (template != null) {
          objectOutputStream.writeObject(template);
        }
        objectOutputStream.flush();
        objectOutputStream.close();
      }
      // KOML/XML
      else {
        BufferedOutputStream xmlOutputStream = new BufferedOutputStream(os);
        if (objectOutputFileName.endsWith(".xml")) {
          XMLSerialization xmlSerial = new XMLClassifier();
          xmlSerial.write(xmlOutputStream, classifier);
        }
        else
          // whether KOML is present has already been checked
          // if not present -> ".koml" is interpreted as binary - see above
          if (objectOutputFileName.endsWith(".koml")) {
            KOML.write(xmlOutputStream, classifier);
          }
        xmlOutputStream.close();
      }
    }


    // If classifier is drawable output string describing graph
    if ((classifier instanceof Drawable) && (printGraph)){
      return ((Drawable)classifier).graph();
    }


    // Output the classifier as equivalent source
    if ((classifier instanceof Sourcable) && (printSource)){
      return wekaStaticWrapper((Sourcable) classifier, sourceClass);
    }


    // Output model
    if (!(noOutput || printMargins)) {
      if (classifier instanceof OptionHandler) {
        if (schemeOptionsText != null) {
          text.append("\nOptions: "+schemeOptionsText);
          text.append("\n");
        }
      }
      text.append("\n" + classifier.toString() + "\n");
    }


    if (!printMargins && (costMatrix != null)) {
      text.append("\n=== Evaluation Cost Matrix ===\n\n");
      text.append(costMatrix.toString());
    }


    // Output test instance predictions only
    if (classificationOutput != null) {
      DataSource source = testSource;
      predsBuff = new StringBuffer();
      classificationOutput.setBuffer(predsBuff);
      // no test set -> use train set
      if (source == null && noCrossValidation) {
        source = trainSource;
        predsBuff.append("\n=== Predictions on training data ===\n\n");
      } else {
        predsBuff.append("\n=== Predictions on test data ===\n\n");
      }
      if (source != null)
        classificationOutput.print(classifierClassifications, source);
    }


    // Compute error estimate from training data
    if ((trainStatistics) && (trainSetPresent)) {


      if ((classifier instanceof UpdateableClassifier) &&
          (testSetPresent) &&
          (costMatrix == null)) {


        // Classifier was trained incrementally, so we have to
        // reset the source.
        trainSource.reset();


        // Incremental testing
        train = trainSource.getStructure(actualClassIndex);
        testTimeStart = System.currentTimeMillis();
        Instance trainInst;
        while (trainSource.hasMoreElements(train)) {
          trainInst = trainSource.nextElement(train);
          trainingEvaluation.evaluateModelOnce((Classifier)classifier, trainInst);
        }
        testTimeElapsed = System.currentTimeMillis() - testTimeStart;
      } else {
        testTimeStart = System.currentTimeMillis();
        trainingEvaluation.evaluateModel(
            classifier, trainSource.getDataSet(actualClassIndex));
        testTimeElapsed = System.currentTimeMillis() - testTimeStart;
      }


      // Print the results of the training evaluation
      if (printMargins) {
        return trainingEvaluation.toCumulativeMarginDistributionString();
      } else {
        if (classificationOutput == null) {
          text.append("\nTime taken to build model: "
              + Utils.doubleToString(trainTimeElapsed / 1000.0,2)
              + " seconds");


          if (splitPercentage > 0)
            text.append("\nTime taken to test model on training split: ");
          else
            text.append("\nTime taken to test model on training data: ");
          text.append(Utils.doubleToString(testTimeElapsed / 1000.0,2) + " seconds");


          if (splitPercentage > 0)
            text.append(trainingEvaluation.toSummaryString("\n\n=== Error on training"
                  + " split ===\n", printComplexityStatistics));
          else
            text.append(trainingEvaluation.toSummaryString("\n\n=== Error on training"
                  + " data ===\n", printComplexityStatistics));


          if (template.classAttribute().isNominal()) {
            if (classStatistics) {
              text.append("\n\n" + trainingEvaluation.toClassDetailsString());
            }
            if (!noCrossValidation)
              text.append("\n\n" + trainingEvaluation.toMatrixString());
          }
        }
      }
    }


    // Compute proper error estimates
    if (testSource != null) {
      // Testing is on the supplied test data
      testSource.reset();
      test = testSource.getStructure(test.classIndex());
      Instance testInst;
      while (testSource.hasMoreElements(test)) {        
        testInst = testSource.nextElement(test);
        testingEvaluation.evaluateModelOnceAndRecordPrediction(
            (Classifier)classifier, testInst);
      }


      if (splitPercentage > 0) {
        if (classificationOutput == null) {
          text.append("\n\n" + testingEvaluation.
              toSummaryString("=== Error on test split ===\n",
                  printComplexityStatistics));
        }
      } else {
        if (classificationOutput == null) {
          text.append("\n\n" + testingEvaluation.
              toSummaryString("=== Error on test data ===\n",
                  printComplexityStatistics));
        }
      }


    } else if (trainSource != null) {
      if (!noCrossValidation) {
        // Testing is via cross-validation on training data
        Random random = new Random(seed);
        // use untrained (!) classifier for cross-validation
        classifier = AbstractClassifier.makeCopy(classifierBackup);
        if (classificationOutput == null) {
          testingEvaluation.crossValidateModel(classifier,
                                               trainSource.getDataSet(actualClassIndex),
                                               folds, random);
          if (template.classAttribute().isNumeric()) {
            text.append("\n\n\n" + testingEvaluation.
                        toSummaryString("=== Cross-validation ===\n",
                                        printComplexityStatistics));
          } else {
            text.append("\n\n\n" + testingEvaluation.
                        toSummaryString("=== Stratified " +
                                        "cross-validation ===\n",
                                        printComplexityStatistics));
          }
        } else {
          predsBuff = new StringBuffer();
          classificationOutput.setBuffer(predsBuff);
          predsBuff.append("\n=== Predictions under cross-validation ===\n\n");
          testingEvaluation.crossValidateModel(classifier,
                                               trainSource.getDataSet(actualClassIndex),
                                               folds, random, classificationOutput);
        }
      }
    }
    if (template.classAttribute().isNominal()) {
      if (classStatistics && !noCrossValidation && (classificationOutput == null)) {
        text.append("\n\n" + testingEvaluation.toClassDetailsString());
      }
      if (!noCrossValidation && (classificationOutput == null))
        text.append("\n\n" + testingEvaluation.toMatrixString());


    }


    // predictions from cross-validation?
    if (predsBuff != null) {
      text.append("\n" + predsBuff);
    }


    if ((thresholdFile.length() != 0) && template.classAttribute().isNominal()) {
      int labelIndex = 0;
      if (thresholdLabel.length() != 0)
        labelIndex = template.classAttribute().indexOfValue(thresholdLabel);
      if (labelIndex == -1)
        throw new IllegalArgumentException(
            "Class label '" + thresholdLabel + "' is unknown!");
      ThresholdCurve tc = new ThresholdCurve();
      Instances result = tc.getCurve(testingEvaluation.predictions(), labelIndex);
      DataSink.write(thresholdFile, result);
    }


    return text.toString();
  }

View Full Code Here

Examples of weka.core.Instances

      if (i == m_Instances.classIndex())
  hv.addElement(predictedClass);
      hv.addElement(m_Instances.attribute(i).copy());
    }
    
    m_PlotInstances = new Instances(
  m_Instances.relationName() + "_predicted", hv, m_Instances.numInstances());
    m_PlotInstances.setClassIndex(m_ClassIndex + 1);
  }

View Full Code Here

Examples of weka.core.Instances

    int    num;
    int    i;
    int    n;
    FastVector  preds;
    FastVector  atts;
    Instances  data;
    Instance  inst;
    Instance  newInst;
    double[]  values;
    double[][]  predInt;
    
    // determine the maximum number of intervals
    maxNum = 0;
    preds  = m_Evaluation.predictions();
    for (i = 0; i < preds.size(); i++) {
      num = ((NumericPrediction) preds.elementAt(i)).predictionIntervals().length;
      if (num > maxNum)
  maxNum = num;
    }
    
    // create new header
    atts = new FastVector();
    for (i = 0; i < m_PlotInstances.numAttributes(); i++)
      atts.addElement(m_PlotInstances.attribute(i));
    for (i = 0; i < maxNum; i++) {
      atts.addElement(new Attribute("predictionInterval_" + (i+1) + "-lowerBoundary"));
      atts.addElement(new Attribute("predictionInterval_" + (i+1) + "-upperBoundary"));
      atts.addElement(new Attribute("predictionInterval_" + (i+1) + "-width"));
    }
    data = new Instances(m_PlotInstances.relationName(), atts, m_PlotInstances.numInstances());
    data.setClassIndex(m_PlotInstances.classIndex());
    
    // update data
    for (i = 0; i < m_PlotInstances.numInstances(); i++) {
      inst = m_PlotInstances.instance(i);
      // copy old values
      values = new double[data.numAttributes()];
      System.arraycopy(inst.toDoubleArray(), 0, values, 0, inst.numAttributes());
      // add interval data
      predInt = ((NumericPrediction) preds.elementAt(i)).predictionIntervals();
      for (n = 0; n < maxNum; n++) {
  if (n < predInt.length){
    values[m_PlotInstances.numAttributes() + n*3 + 0] = predInt[n][0];
    values[m_PlotInstances.numAttributes() + n*3 + 1] = predInt[n][1];
    values[m_PlotInstances.numAttributes() + n*3 + 2] = predInt[n][1] - predInt[n][0];
  }
  else {
    values[m_PlotInstances.numAttributes() + n*3 + 0] = Utils.missingValue();
    values[m_PlotInstances.numAttributes() + n*3 + 1] = Utils.missingValue();
    values[m_PlotInstances.numAttributes() + n*3 + 2] = Utils.missingValue();
  }
      }
      // create new Instance
      newInst = new DenseInstance(inst.weight(), values);
      data.add(newInst);
    }
    
    m_PlotInstances = data;
  }

View Full Code Here

Examples of weka.core.Instances

   * returns the instances of the panel, if none then NULL
   * 
   * @return    the instances of the panel
   */
  public Instances getInstances() {
    Instances            result;
    
    result = null;
    
    if (m_TableArff.getModel() != null)
      result = ((ArffSortedTableModel) m_TableArff.getModel()).getInstances();

View Full Code Here

0 1 2 3 4 5

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.