Source Code of edu.stanford.nlp.optimization.QNMinimizer$SurpriseConvergence

package edu.stanford.nlp.optimization;


import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Set;


import edu.stanford.nlp.io.RuntimeIOException;
import edu.stanford.nlp.math.ArrayMath;
import edu.stanford.nlp.util.Generics;




/**
 *
 * An implementation of L-BFGS for Quasi Newton unconstrained minimization.
 *
 * The general outline of the algorithm is taken from:
 * <blockquote>
 * <i>Numerical Optimization</i> (second edition) 2006
 * Jorge Nocedal and Stephen J. Wright
 * </blockquote>
 * A variety of different options are available.
 *
 * <h3>LINESEARCHES</h3>
 *
 * BACKTRACKING: This routine
 * simply starts with a guess for step size of 1. If the step size doesn't
 * supply a sufficient decrease in the function value the step is updated
 * through step = 0.1*step. This method is certainly simpler, but doesn't allow
 * for an increase in step size, and isn't well suited for Quasi Newton methods.
 *
 * MINPACK: This routine is based off of the implementation used in MINPACK.
 * This routine finds a point satisfying the Wolfe conditions, which state that
 * a point must have a sufficiently smaller function value, and a gradient of
 * smaller magnitude. This provides enough to prove theoretically quadratic
 * convergence. In order to find such a point the linesearch first finds an
 * interval which must contain a satisfying point, and then progressively
 * reduces that interval all using cubic or quadratic interpolation.
 *
 * SCALING: L-BFGS allows the initial guess at the hessian to be updated at each
 * step. Standard BFGS does this by approximating the hessian as a scaled
 * identity matrix. To use this method set the scaleOpt to SCALAR. A better way
 * of approximate the hessian is by using a scaling diagonal matrix. The
 * diagonal can then be updated as more information comes in. This method can be
 * used by setting scaleOpt to DIAGONAL.
 *
 *
 * CONVERGENCE: Previously convergence was gauged by looking at the average
 * decrease per step dividing that by the current value and terminating when
 * that value because smaller than TOL. This method fails when the function
 * value approaches zero, so two other convergence criteria are used. The first
 * stores the initial gradient norm |g0|, then terminates when the new gradient
 * norm, |g| is sufficiently smaller: i.e., |g| &lt; eps*|g0| the second checks if
 * |g| &lt; eps*max( 1 , |x| ) which is essentially checking to see if the gradient
 * is numerically zero.
 * Another convergence criteria is added where termination is triggered if no
 * improvements are observed after X (set by terminateOnEvalImprovementNumOfEpoch)
 * iterations over some validation test set as evaluated by Evaluator
 *
 * Each of these convergence criteria can be turned on or off by setting the
 * flags:
 * <blockquote><code>
 * private boolean useAveImprovement = true;
 * private boolean useRelativeNorm = true;
 * private boolean useNumericalZero = true;
 * private boolean useEvalImprovement = false;
 * </code></blockquote>
 *
 * To use the QNMinimizer first construct it using
 * <blockquote><code>
 * QNMinimizer qn = new QNMinimizer(mem, true)
 * </code></blockquote>
 * mem - the number of previous estimate vector pairs to
 * store, generally 15 is plenty. true - this tells the QN to use the MINPACK
 * linesearch with DIAGONAL scaling. false would lead to the use of the criteria
 * used in the old QNMinimizer class.
 *
 * Then call:
 * <blockquote><code>
 * qn.minimize(dfunction,convergenceTolerance,initialGuess,maxFunctionEvaluations);
 * </code></blockquote>
 *
 * @author akleeman
 */


public class QNMinimizer implements Minimizer<DiffFunction>, HasEvaluators {


  private int fevals = 0; // the number of function evaluations
  private int maxFevals = -1;
  private int mem = 10; // the number of s,y pairs to retain for BFGS
  private int its = 0; // the number of iterations
  private final Function monitor;
  private boolean quiet;
  private static final NumberFormat nf = new DecimalFormat("0.000E0");
  private static final NumberFormat nfsec = new DecimalFormat("0.00"); // for times
  private static final double ftol = 1e-4; // Linesearch parameters
  private double gtol = 0.9;
  private static final double aMin = 1e-12; // Min step size
  private static final double aMax = 1e12; // Max step size
  private static final double p66 = 0.66; // used to check getting more than 2/3 of width improvement
  private static final double p5 = 0.5; // Some other magic constant
  private static final int a = 0;  // used as array index
  private static final int f = 1;  // used as array index
  private static final int g = 2;  // used as array index
  public boolean outputToFile = false;
  private boolean success = false;
  private boolean bracketed = false; // used for linesearch
  private QNInfo presetInfo = null;
  private boolean noHistory = true;


  // parameters for OWL-QN (L-BFGS with L1-regularization)
  private boolean useOWLQN = false;
  private double lambdaOWL = 0;


  private boolean useAveImprovement = true;
  private boolean useRelativeNorm = true;
  private boolean useNumericalZero = true;
  private boolean useEvalImprovement = false;
  private boolean useMaxItr = false;
  private int maxItr = 0;


  private boolean suppressTestPrompt = false;
  private int terminateOnEvalImprovementNumOfEpoch = 1;


  private int evaluateIters = 0;    // Evaluate every x iterations (0 = no evaluation)
  private int startEvaluateIters = 0; // starting evaluation after x iterations
  private Evaluator[] evaluators;  // separate set of evaluators to check how optimization is going


  public enum eState {
    TERMINATE_MAXEVALS, TERMINATE_RELATIVENORM, TERMINATE_GRADNORM, TERMINATE_AVERAGEIMPROVE, CONTINUE, TERMINATE_EVALIMPROVE, TERMINATE_MAXITR
  }


  public enum eLineSearch {
    BACKTRACK, MINPACK
  }


  public enum eScaling {
    DIAGONAL, SCALAR
  }


  eLineSearch lsOpt = eLineSearch.MINPACK;// eLineSearch.MINPACK;
  eScaling scaleOpt = eScaling.DIAGONAL;// eScaling.DIAGONAL;
  eState state = eState.CONTINUE;




  public QNMinimizer() {
    this((Function) null);
  }


  public QNMinimizer(int m) {
    this(null, m);
  }


  public QNMinimizer(int m, boolean useRobustOptions) {
    this(null, m, useRobustOptions);
  }


  public QNMinimizer(Function monitor) {
    this.monitor = monitor;
  }


  public QNMinimizer(Function monitor, int m) {
    this(monitor, m, false);
  }


  public QNMinimizer(Function monitor, int m, boolean useRobustOptions) {
    this.monitor = monitor;
    mem = m;
    if (useRobustOptions) {
      this.setRobustOptions();
    }
  }


  public QNMinimizer(FloatFunction monitor) {
    throw new UnsupportedOperationException("Doesn't support floats yet");
  }


  public void setOldOptions() {
    useAveImprovement = true;
    useRelativeNorm = false;
    useNumericalZero = false;
    lsOpt = eLineSearch.BACKTRACK;
    scaleOpt = eScaling.SCALAR;
  }


  public final void setRobustOptions() {
    useAveImprovement = true;
    useRelativeNorm = true;
    useNumericalZero = true;
    lsOpt = eLineSearch.MINPACK;
    scaleOpt = eScaling.DIAGONAL;
  }


  @Override
  public void setEvaluators(int iters, Evaluator[] evaluators) {
    this.evaluateIters = iters;
    this.evaluators = evaluators;
  }


  public void setEvaluators(int iters, int startEvaluateIters, Evaluator[] evaluators) {
    this.evaluateIters = iters;
    this.startEvaluateIters = startEvaluateIters;
    this.evaluators = evaluators;
  }


  public void terminateOnRelativeNorm(boolean toTerminate) {
    useRelativeNorm = toTerminate;
  }


  public void terminateOnNumericalZero(boolean toTerminate) {
    useNumericalZero = toTerminate;
  }


  public void terminateOnAverageImprovement(boolean toTerminate) {
    useAveImprovement = toTerminate;
  }


  public void terminateOnEvalImprovement(boolean toTerminate) {
    useEvalImprovement = toTerminate;
  }


  public void terminateOnMaxItr(int maxItr) {
    if (maxItr > 0) {
      useMaxItr = true;
      this.maxItr = maxItr;
    }
  }


  public void suppressTestPrompt(boolean suppressTestPrompt) {
    this.suppressTestPrompt = suppressTestPrompt;
  }


  public void setTerminateOnEvalImprovementNumOfEpoch(int terminateOnEvalImprovementNumOfEpoch) {
    this.terminateOnEvalImprovementNumOfEpoch = terminateOnEvalImprovementNumOfEpoch;
  }


  public void useMinPackSearch() {
    lsOpt = eLineSearch.MINPACK;
  }


  public void useBacktracking() {
    lsOpt = eLineSearch.BACKTRACK;
  }


  public void useDiagonalScaling() {
    scaleOpt = eScaling.DIAGONAL;
  }


  public void useScalarScaling() {
    scaleOpt = eScaling.SCALAR;
  }


  public boolean wasSuccessful() {
    return success;
  }


  public void shutUp() {
    this.quiet = true;
  }


  public void setM(int m) {
    mem = m;
  }


  public static class SurpriseConvergence extends Throwable {


    private static final long serialVersionUID = 4290178321643529559L;


    public SurpriseConvergence(String s) {
      super(s);
    }
  }


  private static class MaxEvaluationsExceeded extends Throwable {


    private static final long serialVersionUID = 8044806163343218660L;


    public MaxEvaluationsExceeded(String s) {
      super(s);
    }
  }


  /**
   *
   * The Record class is used to collect information about the function value
   * over a series of iterations. This information is used to determine
   * convergence, and to (attempt to) ensure numerical errors are not an issue.
   * It can also be used for plotting the results of the optimization routine.
   *
   * @author akleeman
   */
  public class Record {
    // convergence options.
    // have average difference like before
    // zero gradient.


    // for convergence test
    private final List<Double> evals = new ArrayList<Double>();
    private final List<Double> values = new ArrayList<Double>();
    List<Double> gNorms = new ArrayList<Double>();
    // List<Double> xNorms = new ArrayList<Double>();
    private final List<Integer> funcEvals = new ArrayList<Integer>();
    private final List<Double> time = new ArrayList<Double>();
    // gNormInit: This makes it so that if for some reason
    // you try and divide by the initial norm before it's been
    // initialized you don't get a NAN but you will also never
    // get false convergence.
    private double gNormInit = Double.MIN_VALUE;
    private double relativeTOL = 1e-8;
    private double TOL = 1e-6;
    private double EPS = 1e-6;
    private long startTime;
    private double gNormLast; // This is used for convergence.
    private double[] xLast;
    private int maxSize = 100; // This will control the number of func values /
                                // gradients to retain.
    private Function mon = null;
    private boolean quiet = false;
    private boolean memoryConscious = true;
    private PrintWriter outputFile = null;


    private int noImproveItrCount = 0;
    private double[] xBest;


    public Record(boolean beQuiet, Function monitor, double tolerance) {
      this.quiet = beQuiet;
      this.mon = monitor;
      this.TOL = tolerance;
    }


    public Record(boolean beQuiet, Function monitor, double tolerance,
        PrintWriter output) {
      this.quiet = beQuiet;
      this.mon = monitor;
      this.TOL = tolerance;
      this.outputFile = output;
    }


    public Record(boolean beQuiet, Function monitor, double tolerance,
        double eps) {
      this.quiet = beQuiet;
      this.mon = monitor;
      this.TOL = tolerance;
      this.EPS = eps;
    }


    public void setEPS(double eps) {
      EPS = eps;
    }


    public void setTOL(double tolerance) {
      TOL = tolerance;
    }


    public void start(double val, double[] grad) {
      start(val, grad, null);
    }


    /*
     * Stops output to stdout.
     */
    public void shutUp() {
      this.quiet = true;
    }


    /*
     * Initialize the class, this starts the timer, and initiates the gradient
     * norm for use with convergence.
     */
    public void start(double val, double[] grad, double[] x) {
      startTime = System.currentTimeMillis();
      gNormInit = ArrayMath.norm(grad);
      xLast = x;
      writeToFile(1, val, gNormInit, 0.0);


      if (x != null) {
        monitorX(x);
      }
    }


    private void writeToFile(double fevals, double val, double gNorm,
        double time) {
      if (outputFile != null) {
        outputFile.println(fevals + "," + val + "," + gNorm + "," + time);
      }
    }


    public void add(double val, double[] grad, double[] x, int fevals, double evalScore) {


      if (!memoryConscious) {
        if (gNorms.size() > maxSize) {
          gNorms.remove(0);
        }
        if (time.size() > maxSize) {
          time.remove(0);
        }
        if (funcEvals.size() > maxSize) {
          funcEvals.remove(0);
        }
        gNorms.add(gNormLast);
        time.add(howLong());
        funcEvals.add(fevals);
      } else {
        maxSize = 10;
      }


      gNormLast = ArrayMath.norm(grad);
      if (values.size() > maxSize) {
        values.remove(0);
      }


      values.add(val);


      if (evalScore != Double.NEGATIVE_INFINITY)
        evals.add(evalScore);


      writeToFile(fevals, val, gNormLast, howLong());


      say(nf.format(val) + " " + nfsec.format(howLong()) + "s");


      xLast = x;
      monitorX(x);
    }


    public void monitorX(double[] x) {
      if (this.mon != null) {
        this.mon.valueAt(x);
      }
    }


    /**
     * This function checks for convergence through first
     * order optimality,  numerical convergence (i.e., zero numerical
     * gradient), and also by checking the average improvement.
     *
     * @return A value of the enumeration type <p>eState</p> which tells the
     *   state of the optimization routine indicating whether the routine should
     *   terminate, and if so why.
     */
    public eState toContinue() {


      double relNorm = gNormLast / gNormInit;
      int size = values.size();
      double newestVal = values.get(size - 1);
      double previousVal = (size >= 10 ? values.get(size - 10) : values.get(0));
      double averageImprovement = (previousVal - newestVal) / (size >= 10 ? 10 : size);
      int evalsSize = evals.size();


      if (useMaxItr && its >= maxItr)
        return eState.TERMINATE_MAXITR;


      if (useEvalImprovement) {
        int bestInd = -1;
        double bestScore = Double.NEGATIVE_INFINITY;
        for (int i = 0; i < evalsSize; i++) {
          if (evals.get(i) >= bestScore) {
            bestScore = evals.get(i);
            bestInd = i;
          }
        }
        if (bestInd == evalsSize-1) { // copy xBest
          if (xBest == null)
            xBest = Arrays.copyOf(xLast, xLast.length);
          else
            System.arraycopy( xLast, 0, xBest, 0, xLast.length );
        }
        if ((evalsSize - bestInd) >= terminateOnEvalImprovementNumOfEpoch)
          return eState.TERMINATE_EVALIMPROVE;
      }


      // This is used to be able to reproduce results that were trained on the
      // QNMinimizer before
      // convergence criteria was updated.
      if (useAveImprovement
          && (size > 5 && Math.abs(averageImprovement / newestVal) < TOL)) {
        return eState.TERMINATE_AVERAGEIMPROVE;
      }


      // Check to see if the gradient is sufficiently small
      if (useRelativeNorm && relNorm <= relativeTOL) {
        return eState.TERMINATE_RELATIVENORM;
      }


      if (useNumericalZero) {
        // This checks if the gradient is sufficiently small compared to x that
        // it is treated as zero.
        if (gNormLast < EPS * Math.max(1.0, ArrayMath.norm_1(xLast))) {
          // |g| < |x|_1
          // First we do the one norm, because that's easiest, and always bigger.
          if (gNormLast < EPS * Math.max(1.0, ArrayMath.norm(xLast))) {
            // |g| < max(1,|x|)
            // Now actually compare with the two norm if we have to.
            System.err
                .println("Gradient is numerically zero, stopped on machine epsilon.");
            return eState.TERMINATE_GRADNORM;
          }
        }
        // give user information about the norms.
      }


      say(" |" + nf.format(gNormLast) + "| {" + nf.format(relNorm) + "} "
            + nf.format(Math.abs(averageImprovement / newestVal)) + " " + (evalsSize > 0 ? evals.get(evalsSize-1).toString() : "-") + " ");
      return eState.CONTINUE;
    }


    /**
     *  Return the time in seconds since this class was created.
     *  @return The time in seconds since this class was created.
     */
    public double howLong() {
      return ((System.currentTimeMillis() - startTime)) / 1000.0;
    }


    public double[] getBest() {
      return xBest;
    }


  } // end class Record


  /**
   * The QNInfo class is used to store information about the Quasi Newton
   * update. it holds all the s,y pairs, updates the diagonal and scales
   * everything as needed.
   */
  public class QNInfo {
    // Diagonal Options
    // Linesearch Options
    // Memory stuff
    private List<double[]> s = null;
    private List<double[]> y = null;
    private List<Double> rho = null;
    private double gamma;
    public double[] d = null;
    private int mem;
    private int maxMem = 20;
    public eScaling scaleOpt = eScaling.SCALAR;


    public QNInfo(int size) {
      s = new ArrayList<double[]>();
      y = new ArrayList<double[]>();
      rho = new ArrayList<Double>();
      gamma = 1;
      mem = size;
    }


    public QNInfo() {
      s = new ArrayList<double[]>();
      y = new ArrayList<double[]>();
      rho = new ArrayList<Double>();
      gamma = 1;
      mem = maxMem;
    }


    public QNInfo(List<double[]> sList, List<double[]> yList) {
      s = new ArrayList<double[]>();
      y = new ArrayList<double[]>();
      rho = new ArrayList<Double>();
      gamma = 1;
      setHistory(sList, yList);
    }


    public int size() {
      return s.size();
    }


    public double getRho(int ind) {
      return rho.get(ind);
    }


    public double[] getS(int ind) {
      return s.get(ind);
    }


    public double[] getY(int ind) {
      return y.get(ind);
    }


    public void useDiagonalScaling() {
      this.scaleOpt = eScaling.DIAGONAL;
    }


    public void useScalarScaling() {
      this.scaleOpt = eScaling.SCALAR;
    }


    /*
     * Free up that memory.
     */
    public void free() {
      s = null;
      y = null;
      rho = null;
      d = null;
    }


    public void clear() {
      s.clear();
      y.clear();
      rho.clear();
      d = null;
    }


    /*
     * applyInitialHessian(double[] x)
     *
     * This function takes the vector x, and applies the best guess at the
     * initial hessian to this vector, based off available information from
     * previous updates.
     */
    public void setHistory(List<double[]> sList, List<double[]> yList) {
      int size = sList.size();


      for (int i = 0; i < size; i++) {
        update(sList.get(i), yList.get(i), ArrayMath.innerProduct(yList.get(i),
            yList.get(i)), ArrayMath.innerProduct(sList.get(i), yList.get(i)),
            0, 1.0);
      }
    }


    public double[] applyInitialHessian(double[] x) {


      switch (scaleOpt) {
      case SCALAR:
        say("I");
        ArrayMath.multiplyInPlace(x, gamma);
        break;
      case DIAGONAL:
        say("D");
        if (d != null) {
          // Check sizes
          if (x.length != d.length) {
            throw new IllegalArgumentException("Vector of incorrect size passed to applyInitialHessian in QNInfo class");
          }
          // Scale element-wise
          for (int i = 0; i < x.length; i++) {
            x[i] = x[i] / (d[i]);
          }
        }
        break;
      }


      return x;


    }


    /*
     * The update function is used to update the hessian approximation used by
     * the quasi newton optimization routine.
     *
     * If everything has behaved nicely, this involves deciding on a new initial
     * hessian through scaling or diagonal update, and then storing of the
     * secant pairs s = x - previousX and y = grad - previousGrad.
     *
     * Things can go wrong, if any non convex behavior is detected (s^T y < 0)
     * or numerical errors are likely the update is skipped.
     *
     */
    public int update(double[] newX, double[] x, double[] newGrad,
        double[] grad, double step) throws SurpriseConvergence {
      // todo: add outofmemory error.
      double[] newS, newY;
      double sy, yy, sg;


      // allocate arrays for new s,y pairs (or replace if the list is already
      // full)
      if (mem > 0 && s.size() == mem || s.size() == maxMem) {
        newS = s.remove(0);
        newY = y.remove(0);
        rho.remove(0);
      } else {
        newS = new double[x.length];
        newY = new double[x.length];
      }


      // Here we construct the new pairs, and check for positive definiteness.
      sy = 0;
      yy = 0;
      sg = 0;
      for (int i = 0; i < x.length; i++) {
        newS[i] = newX[i] - x[i];
        newY[i] = newGrad[i] - grad[i];
        sy += newS[i] * newY[i];
        yy += newY[i] * newY[i];
        sg += newS[i] * newGrad[i];
      }


      // Apply the updates used for the initial hessian.


      return update(newS, newY, yy, sy, sg, step);
    }


    private class NegativeCurvature extends Throwable {
      /**
       *
       */
      private static final long serialVersionUID = 4676562552506850519L;


      public NegativeCurvature() {
      }
    }


    private class ZeroGradient extends Throwable {
      /**
       *
       */
      private static final long serialVersionUID = -4001834044987928521L;


      public ZeroGradient() {
      }
    }


    public int update(double[] newS, double[] newY, double yy, double sy,
        double sg, double step) {


      // Initialize diagonal to the identity
      if (scaleOpt == eScaling.DIAGONAL && d == null) {
        d = new double[newS.length];
        for (int i = 0; i < d.length; i++) {
          d[i] = 1.0;
        }
      }


      try {


        if (sy < 0) {
          throw new NegativeCurvature();
        }


        if (yy == 0.0) {
          throw new ZeroGradient();
        }


        switch (scaleOpt) {
        /*
         * SCALAR: The standard L-BFGS initial approximation which is just a
         * scaled identity.
         */
        case SCALAR:
          gamma = sy / yy;
          break;
        /*
         * DIAGONAL: A diagonal scaling matrix is used as the initial
         * approximation. The updating method used is used thanks to Andrew
         * Bradley of the ICME dept.
         */
        case DIAGONAL:


          double sDs;
          // Gamma is designed to scale such that a step length of one is
          // generally accepted.
          gamma = sy / (step * (sy - sg));
          sDs = 0.0;
          for (int i = 0; i < d.length; i++) {
            d[i] = gamma * d[i];
            sDs += newS[i] * d[i] * newS[i];
          }
          // This diagonal update was introduced by Andrew Bradley
          for (int i = 0; i < d.length; i++) {
            d[i] = (1 - d[i] * newS[i] * newS[i] / sDs) * d[i] + newY[i]
                * newY[i] / sy;
          }
          // Here we make sure that the diagonal is alright
          double minD = ArrayMath.min(d);
          double maxD = ArrayMath.max(d);


          // If things have gone bad, just fill with the SCALAR approx.
          if (minD <= 0 || Double.isInfinite(maxD) || maxD / minD > 1e12) {
            System.err
                .println("QNInfo:update() : PROBLEM WITH DIAGONAL UPDATE");
            double fill = yy / sy;
            for (int i = 0; i < d.length; i++) {
              d[i] = fill;
            }
          }


        }


        // If s is already of size mem, remove the oldest vector and free it up.


        if (mem > 0 && s.size() == mem || s.size() == maxMem) {
          s.remove(0);
          y.remove(0);
          rho.remove(0);
        }


        // Actually add the pair.
        s.add(newS);
        y.add(newY);
        rho.add(1 / sy);


      } catch (NegativeCurvature nc) {
        // NOTE: if applying QNMinimizer to a non convex problem, we would still
        // like to update the matrix
        // or we could get stuck in a series of skipped updates.
        say(" Negative curvature detected, update skipped ");
      } catch (ZeroGradient zg) {
        say(" Either convergence, or floating point errors combined with extremely linear region ");
      }


      return s.size();
    } // end update


  } // end class QNInfo


  public void setHistory(List<double[]> s, List<double[]> y) {
    presetInfo = new QNInfo(s, y);
  }


  /*
   * computeDir()
   *
   * This function will calculate an approximation of the inverse hessian based
   * off the seen s,y vector pairs. This particular approximation uses the BFGS
   * update.
   *
   */


  private void computeDir(double[] dir, double[] fg, double[] x, QNInfo qn, Function func)
      throws SurpriseConvergence {
    System.arraycopy(fg, 0, dir, 0, fg.length);


    int mmm = qn.size();
    double[] as = new double[mmm];


    for (int i = mmm - 1; i >= 0; i--) {
      as[i] = qn.getRho(i) * ArrayMath.innerProduct(qn.getS(i), dir);
      plusAndConstMult(dir, qn.getY(i), -as[i], dir);
    }


    // multiply by hessian approximation
    qn.applyInitialHessian(dir);


    for (int i = 0; i < mmm; i++) {
      double b = qn.getRho(i) * ArrayMath.innerProduct(qn.getY(i), dir);
      plusAndConstMult(dir, qn.getS(i), as[i] - b, dir);
    }


    ArrayMath.multiplyInPlace(dir, -1);


    if (useOWLQN) { // step (2) in Galen & Gao 2007
      constrainSearchDir(dir, fg, x, func);
    }
  }


  // computes d = a + b * c
  private static double[] plusAndConstMult(double[] a, double[] b, double c,
      double[] d) {
    for (int i = 0; i < a.length; i++) {
      d[i] = a[i] + c * b[i];
    }
    return d;
  }


  private double doEvaluation(double[] x) {
    // Evaluate solution
    if (evaluators == null) return Double.NEGATIVE_INFINITY;
    double score = 0;
    for (Evaluator eval:evaluators) {
      if (!suppressTestPrompt)
        say("  Evaluating: " + eval.toString());
      score = eval.evaluate(x);
    }
    return score;
  }


  public float[] minimize(DiffFloatFunction function, float functionTolerance,
      float[] initial) {
    throw new UnsupportedOperationException("Float not yet supported for QN");
  }


  @Override
  public double[] minimize(DiffFunction function, double functionTolerance,
      double[] initial) {
    return minimize(function, functionTolerance, initial, -1);
  }


  @Override
  public double[] minimize(DiffFunction dfunction, double functionTolerance,
      double[] initial, int maxFunctionEvaluations) {
    return minimize(dfunction, functionTolerance, initial,
        maxFunctionEvaluations, null);
  }


  public double[] minimize(DiffFunction dfunction, double functionTolerance,
      double[] initial, int maxFunctionEvaluations, QNInfo qn) {


    say("QNMinimizer called on double function of "
        + dfunction.domainDimension() + " variables,");
    if (mem > 0) {
      sayln(" using M = " + mem + ".");
    } else {
      sayln(" using dynamic setting of M.");
    }


    if (qn == null && presetInfo == null) {
      qn = new QNInfo(mem);
      noHistory = true;
    } else if (presetInfo != null) {
      qn = presetInfo;
      noHistory = false;
    } else if (qn != null) {
      noHistory = false;
    }


    double[] x, newX, grad, newGrad, dir;
    double value;
    its = 0;
    fevals = 0;
    success = false;


    qn.scaleOpt = scaleOpt;


    // initialize weights
    x = initial;


    // initialize gradient
    grad = new double[x.length];
    newGrad = new double[x.length];
    newX = new double[x.length];
    dir = new double[x.length];


    // initialize function value and gradient (gradient is stored in grad inside
    // evaluateFunction)
    value = evaluateFunction(dfunction, x, grad);
    if (useOWLQN) {
      double norm = l1NormOWL(x, dfunction);
      value += norm * lambdaOWL;
      grad = pseudoGradientOWL(x, grad, dfunction); // step (1) in Galen & Gao except we are not computing v yet
    }


    PrintWriter outFile = null;
    PrintWriter infoFile = null;


    if (outputToFile) {
      try {
        String baseName = "QN_m" + mem + "_" + lsOpt.toString() + "_"
            + scaleOpt.toString();
        outFile = new PrintWriter(new FileOutputStream(baseName + ".output"),
            true);
        infoFile = new PrintWriter(new FileOutputStream(baseName + ".info"),
            true);
        infoFile.println(dfunction.domainDimension() + "; DomainDimension ");
        infoFile.println(mem + "; memory");
      } catch (IOException e) {
        throw new RuntimeIOException("Caught IOException outputting QN data to file", e);
      }
    }


    Record rec = new Record(quiet, monitor, functionTolerance, outFile);
    // sets the original gradient and x. Also stores the monitor.
    rec.start(value, grad, x);


    // Check if max Evaluations and Iterations have been provided.
    maxFevals = (maxFunctionEvaluations > 0) ? maxFunctionEvaluations
        : Integer.MAX_VALUE;
    // maxIterations = (maxIterations > 0) ? maxIterations : Integer.MAX_VALUE;


    sayln("               An explanation of the output:");
    sayln("Iter           The number of iterations");
    sayln("evals          The number of function evaluations");
    sayln("SCALING        <D> Diagonal scaling was used; <I> Scaled Identity");
    sayln("LINESEARCH     [## M steplength]  Minpack linesearch");
    sayln("                   1-Function value was too high");
    sayln("                   2-Value ok, gradient positive, positive curvature");
    sayln("                   3-Value ok, gradient negative, positive curvature");
    sayln("                   4-Value ok, gradient negative, negative curvature");
    sayln("               [.. B]  Backtracking");
    sayln("VALUE          The current function value");
    sayln("TIME           Total elapsed time");
    sayln("|GNORM|        The current norm of the gradient");
    sayln("{RELNORM}      The ratio of the current to initial gradient norms");
    sayln("AVEIMPROVE     The average improvement / current value");
    sayln("EVALSCORE      The last available eval score");
    sayln();
    sayln("Iter ## evals ## <SCALING> [LINESEARCH] VALUE TIME |GNORM| {RELNORM} AVEIMPROVE EVALSCORE");


    // Beginning of the loop.
    do {
      try {
        sayln();
        boolean doEval = (its >= 0 && its >= startEvaluateIters && evaluateIters > 0 && its % evaluateIters == 0);
        its += 1;
        double newValue;
        double[] newPoint = new double[3]; // initialized in loop
        say("Iter " + its + " evals " + fevals + " ");


        // Compute the search direction
        say("<");
        computeDir(dir, grad, x, qn, dfunction);
        say("> ");


        // sanity check dir
        boolean hasNaNDir = false;
        boolean hasNaNGrad = false;
        for (int i = 0; i < dir.length; i++) {
          if (dir[i] != dir[i]) hasNaNDir = true;
          if (grad[i] != grad[i]) hasNaNGrad = true;
        }
        if (hasNaNDir && !hasNaNGrad) {
          say("(NaN dir likely due to Hessian approx - resetting) ");
          qn.clear();
          // re-compute the search direction
          say("<");
          computeDir(dir, grad, x, qn, dfunction);
          say("> ");
        }


        // perform line search
        say("[");


        if (useOWLQN) {
          // only linear search is allowed for OWL-QN
          newPoint = lineSearchBacktrackOWL(dfunction, dir, x, newX, grad, value);
          say("B");
        } else {
          // switch between line search options.
          switch (lsOpt) {
          case BACKTRACK:
            newPoint = lineSearchBacktrack(dfunction, dir, x, newX, grad, value);
            say("B");
            break;
          case MINPACK:
            newPoint = lineSearchMinPack(dfunction, dir, x, newX, grad, value,
                functionTolerance);
            say("M");
            break;
          default:
            sayln("Invalid line search option for QNMinimizer. ");
            System.exit(1);
            break;


          }
        }


        newValue = newPoint[f];
        System.err.print(" " + nf.format(newPoint[a]));
        say("] ");


        // This shouldn't actually evaluate anything since that should have been
        // done in the lineSearch.
        System.arraycopy(dfunction.derivativeAt(newX), 0, newGrad, 0, newGrad.length);


        // This is where all the s, y updates are applied.
        qn.update(newX, x, newGrad, grad, newPoint[a]); // step (4) in Galen & Gao 2007


        if (useOWLQN) {
          // pseudo gradient
          newGrad = pseudoGradientOWL(newX, newGrad, dfunction);
        }


        double evalScore = Double.NEGATIVE_INFINITY;
        if (doEval) {
          evalScore = doEvaluation(newX);
        }


        // Add the current value and gradient to the records, this also monitors
        // X and writes to output
        rec.add(newValue, newGrad, newX, fevals, evalScore);


        // shift
        value = newValue;
        // double[] temp = x;
        // x = newX;
        // newX = temp;
        System.arraycopy(newX, 0, x, 0, x.length);
        System.arraycopy(newGrad, 0, grad, 0, newGrad.length);


        if (quiet) {
          System.err.print(".");
        }
        if (fevals > maxFevals) {
          throw new MaxEvaluationsExceeded(" Exceeded in minimize() loop ");
        }


      } catch (SurpriseConvergence s) {
        sayln();
        sayln("QNMinimizer aborted due to surprise convergence");
        break;
      } catch (MaxEvaluationsExceeded m) {
        sayln();
        sayln("QNMinimizer aborted due to maximum number of function evaluations");
        sayln(m.toString());
        sayln("** This is not an acceptable termination of QNMinimizer, consider");
        sayln("** increasing the max number of evaluations, or safeguarding your");
        sayln("** program by checking the QNMinimizer.wasSuccessful() method.");
        break;
      } catch (OutOfMemoryError oome) {
        sayln();
        if ( ! qn.s.isEmpty()) {
          qn.s.remove(0);
          qn.y.remove(0);
          qn.rho.remove(0);
          qn.mem = qn.s.size();
          System.err.println("Caught OutOfMemoryError, changing m = " + qn.mem);
        } else {
          throw oome;
        }
      }


    } while ((state = rec.toContinue()) == eState.CONTINUE); // do


    if (evaluateIters > 0) {
      // do final evaluation
      double evalScore = (useEvalImprovement ? doEvaluation(rec.getBest()) : doEvaluation(x));
      sayln("final evalScore is: " + evalScore);
    }


    //
    // Announce the reason minimization has terminated.
    //
    System.err.println();
    switch (state) {
    case TERMINATE_GRADNORM:
      System.err
          .println("QNMinimizer terminated due to numerically zero gradient: |g| < EPS  max(1,|x|) ");
      success = true;
      break;
    case TERMINATE_RELATIVENORM:
      System.err
          .println("QNMinimizer terminated due to sufficient decrease in gradient norms: |g|/|g0| < TOL ");
      success = true;
      break;
    case TERMINATE_AVERAGEIMPROVE:
      System.err
          .println("QNMinimizer terminated due to average improvement: | newest_val - previous_val | / |newestVal| < TOL ");
      success = true;
      break;
    case TERMINATE_MAXITR:
      System.err
          .println("QNMinimizer terminated due to reached max iteration " + maxItr );
      success = true;
      break;
    case TERMINATE_EVALIMPROVE:
      System.err
          .println("QNMinimizer terminated due to no improvement on eval ");
      success = true;
      x = rec.getBest();
      break;
    default:
      System.err.println("QNMinimizer terminated without converging");
      success = false;
      break;
    }


    double completionTime = rec.howLong();
    sayln("Total time spent in optimization: " + nfsec.format(completionTime) + "s");


    if (outputToFile) {
      infoFile.println(completionTime + "; Total Time ");
      infoFile.println(fevals + "; Total evaluations");
      infoFile.close();
      outFile.close();
    }


    qn.free();
    return x;


  } // end minimize()






  private void sayln() {
    if (!quiet) {
      System.err.println();
    }
  }


  private void sayln(String s) {
    if (!quiet) {
      System.err.println(s);
    }
  }


  private void say(String s) {
    if (!quiet) {
      System.err.print(s);
    }
  }


  // todo [cdm 2013]: Can this be sped up by returning a Pair rather than copying array?
  private double evaluateFunction(DiffFunction dfunc, double[] x, double[] grad) {
    System.arraycopy(dfunc.derivativeAt(x), 0, grad, 0, grad.length);
    fevals += 1;
    return dfunc.valueAt(x);
  }


  public void useOWLQN(boolean use, double lambda) {
    this.useOWLQN = use;
    this.lambdaOWL = lambda;
  }


  private static Set<Integer> initializeParamRange(Function func, double[] x) {
    Set<Integer> paramRange;
    if (func instanceof HasRegularizerParamRange) {
      paramRange = ((HasRegularizerParamRange)func).getRegularizerParamRange(x);
    } else {
      paramRange = Generics.newHashSet(x.length);
      for (int i = 0; i < x.length; i++) {
        paramRange.add(i);
      }
    }
    return paramRange;
  }


  private static double[] projectOWL(double[] x, double[] orthant, Function func) {
    Set<Integer> paramRange = initializeParamRange(func, x);
    for (int i : paramRange) {
      if (x[i] * orthant[i] <= 0)
        x[i] = 0;
    }
    return x;
  }


  private static double l1NormOWL(double[] x, Function func) {
    Set<Integer> paramRange = initializeParamRange(func, x);
    double sum = 0.0;
    for (int i: paramRange) {
      sum += Math.abs(x[i]);
    }
    return sum;
  }


  private static void constrainSearchDir(double[] dir, double[] fg, double[] x, Function func) {
    Set<Integer> paramRange = initializeParamRange(func, x);
    for (int i: paramRange) {
      if (dir[i] * fg[i] >= 0.0) {
        dir[i] = 0.0;
      }
    }
  }


  private double[] pseudoGradientOWL(double[] x, double[] grad, Function func) {
    Set<Integer> paramRange = initializeParamRange(func, x); // initialized below
    double[] newGrad = new double[grad.length];


    // compute pseudo gradient
    for (int i = 0; i < x.length; i++) {
      if (paramRange.contains(i)) {
        if (x[i] < 0.0) {
          // Differentiable
          newGrad[i] = grad[i] - lambdaOWL;
        } else if (x[i] > 0.0) {
          // Differentiable
          newGrad[i] = grad[i] + lambdaOWL;
        } else {
          if (grad[i] < -lambdaOWL) {
            // Take the right partial derivative
            newGrad[i] = grad[i] + lambdaOWL;
          } else if (grad[i] > lambdaOWL) {
            // Take the left partial derivative
            newGrad[i] = grad[i] - lambdaOWL;
          } else {
            newGrad[i] = 0.0;
          }
        }
      } else {
        newGrad[i] = grad[i];
      }
    }


    return newGrad;
  }




  /*
   * lineSearchBacktrackOWL is the linesearch used for L1 regularization.
   * it only satisfies sufficient descent not the Wolfe conditions.
   */
  private double[] lineSearchBacktrackOWL(Function func, double[] dir, double[] x,
      double[] newX, double[] grad, double lastValue)
      throws MaxEvaluationsExceeded {


    /* Choose the orthant for the new point. */
    double[] orthant = new double[x.length];
    for (int i = 0; i < orthant.length; i++) {
      orthant[i] = (x[i] == 0.0) ? -grad[i] : x[i];
    }


    // c1 can be anything between 0 and 1, exclusive (usu. 1/10 - 1/2)
    double step, c1;


    // for first few steps, we have less confidence in our initial step-size a
    // so scale back quicker
    if (its <= 2) {
      step = 0.1;
      c1 = 0.1;
    } else {
      step = 1.0;
      c1 = 0.1;
    }


    // should be small e.g. 10^-5 ... 10^-1
    double c = 0.01;


    // c = c * normGradInDir;


    double[] newPoint = new double[3];


    while (true) {
      plusAndConstMult(x, dir, step, newX);


      // The current point is projected onto the orthant
      projectOWL(newX, orthant, func); // step (3) in Galen & Gao 2007


      // Evaluate the function and gradient values
      double value  =  func.valueAt(newX);


      // Compute the L1 norm of the variables and add it to the object value
      double norm = l1NormOWL(newX, func);
      value += norm * lambdaOWL;


      newPoint[f] = value;


      double dgtest = 0.0;
      for (int i = 0;i < x.length ;i++) {
        dgtest += (newX[i] - x[i]) * grad[i];
      }


      if (newPoint[f] <= lastValue + c * dgtest)
        break;
      else {
        if (newPoint[f] < lastValue) {
          // an improvement, but not good enough... suspicious!
          say("!");
        } else {
          say(".");
        }
      }


      step = c1 * step;
    }


    newPoint[a] = step;
    fevals += 1;
    if (fevals > maxFevals) {
      throw new MaxEvaluationsExceeded(
          " Exceeded during linesearch() Function ");
    }


    return newPoint;
  }




  /*
   * lineSearchBacktrack is the original linesearch used for the first version
   * of QNMinimizer. it only satisfies sufficient descent not the Wolfe
   * conditions.
   */
  private double[] lineSearchBacktrack(Function func, double[] dir, double[] x,
      double[] newX, double[] grad, double lastValue)
      throws MaxEvaluationsExceeded {


    double normGradInDir = ArrayMath.innerProduct(dir, grad);
    say("(" + nf.format(normGradInDir) + ")");
    if (normGradInDir > 0) {
      say("{WARNING--- direction of positive gradient chosen!}");
    }


    // c1 can be anything between 0 and 1, exclusive (usu. 1/10 - 1/2)
    double step, c1;


    // for first few steps, we have less confidence in our initial step-size a
    // so scale back quicker
    if (its <= 2) {
      step = 0.1;
      c1 = 0.1;
    } else {
      step = 1.0;
      c1 = 0.1;
    }


    // should be small e.g. 10^-5 ... 10^-1
    double c = 0.01;


    // double v = func.valueAt(x);
    // c = c * mult(grad, dir);
    c = c * normGradInDir;


    double[] newPoint = new double[3];


    while ((newPoint[f] = func.valueAt((plusAndConstMult(x, dir, step, newX)))) > lastValue
        + c * step) {
      fevals += 1;
      if (newPoint[f] < lastValue) {
        // an improvement, but not good enough... suspicious!
        say("!");
      } else {
        say(".");
      }
      step = c1 * step;
    }


    newPoint[a] = step;
    fevals += 1;
    if (fevals > maxFevals) {
      throw new MaxEvaluationsExceeded(
          " Exceeded during linesearch() Function ");
    }


    return newPoint;
  }


  private double[] lineSearchMinPack(DiffFunction dfunc, double[] dir,
      double[] x, double[] newX, double[] grad, double f0, double tol)
      throws MaxEvaluationsExceeded {
    double xtrapf = 4.0;
    int info = 0;
    int infoc = 1;
    bracketed = false;
    boolean stage1 = true;
    double width = aMax - aMin;
    double width1 = 2 * width;
    // double[] wa = x;


    // Should check input parameters


    double g0 = ArrayMath.innerProduct(grad, dir);
    if (g0 >= 0) {
      // We're looking in a direction of positive gradient. This won't work.
      // set dir = -grad
      for (int i = 0; i < x.length; i++) {
        dir[i] = -grad[i];
      }
      g0 = ArrayMath.innerProduct(grad, dir);
    }
    double gTest = ftol * g0;


    double[] newPt = new double[3];
    double[] bestPt = new double[3];
    double[] endPt = new double[3];


    newPt[a] = 1.0; // Always guess 1 first, this should be right if the
                    // function is "nice" and BFGS is working.


    if (its == 1 && noHistory) {
      newPt[a] = 1e-1;
    }


    bestPt[a] = 0.0;
    bestPt[f] = f0;
    bestPt[g] = g0;
    endPt[a] = 0.0;
    endPt[f] = f0;
    endPt[g] = g0;


    // int cnt = 0;


    do {


      double stpMin; // = aMin; [cdm: this initialization was always overridden below]
      double stpMax; // = aMax; [cdm: this initialization was always overridden below]
      if (bracketed) {
        stpMin = Math.min(bestPt[a], endPt[a]);
        stpMax = Math.max(bestPt[a], endPt[a]);
      } else {
        stpMin = bestPt[a];
        stpMax = newPt[a] + xtrapf * (newPt[a] - bestPt[a]);
      }


      newPt[a] = Math.max(newPt[a], aMin);
      newPt[a] = Math.min(newPt[a], aMax);


      // Use the best point if we have some sort of strange termination
      // conditions.
      if ((bracketed && (newPt[a] <= stpMin || newPt[a] >= stpMax))
          || fevals >= maxFevals || infoc == 0
          || (bracketed && stpMax - stpMin <= tol * stpMax)) {
        // todo: below..
        plusAndConstMult(x, dir, bestPt[a], newX);
        newPt[f] = bestPt[f];
        newPt[a] = bestPt[a];
      }


      newPt[f] = dfunc.valueAt((plusAndConstMult(x, dir, newPt[a], newX)));
      newPt[g] = ArrayMath.innerProduct(dfunc.derivativeAt(newX), dir);
      double fTest = f0 + newPt[a] * gTest;
      fevals += 1;


      // Check and make sure everything is normal.
      if ((bracketed && (newPt[a] <= stpMin || newPt[a] >= stpMax))
          || infoc == 0) {
        info = 6;
        say(" line search failure: bracketed but no feasible found ");
      }
      if (newPt[a] == aMax && newPt[f] <= fTest && newPt[g] <= gTest) {
        info = 5;
        say(" line search failure: sufficient decrease, but gradient is more negative ");
      }
      if (newPt[a] == aMin && (newPt[f] > fTest || newPt[g] >= gTest)) {
        info = 4;
        say(" line search failure: minimum step length reached ");
      }
      if (fevals >= maxFevals) {
        info = 3;
        throw new MaxEvaluationsExceeded(
            " Exceeded during lineSearchMinPack() Function ");
      }
      if (bracketed && stpMax - stpMin <= tol * stpMax) {
        info = 2;
        say(" line search failure: interval is too small ");
      }
      if (newPt[f] <= fTest && Math.abs(newPt[g]) <= -gtol * g0) {
        info = 1;
      }


      if (info != 0) {
        return newPt;
      }


      // this is the first stage where we look for a point that is lower and
      // increasing


      if (stage1 && newPt[f] <= fTest && newPt[g] >= Math.min(ftol, gtol) * g0) {
        stage1 = false;
      }


      // A modified function is used to predict the step only if
      // we have not obtained a step for which the modified
      // function has a non-positive function value and non-negative
      // derivative, and if a lower function value has been
      // obtained but the decrease is not sufficient.


      if (stage1 && newPt[f] <= bestPt[f] && newPt[f] > fTest) {
        newPt[f] = newPt[f] - newPt[a] * gTest;
        bestPt[f] = bestPt[f] - bestPt[a] * gTest;
        endPt[f] = endPt[f] - endPt[a] * gTest;


        newPt[g] = newPt[g] - gTest;
        bestPt[g] = bestPt[g] - gTest;
        endPt[g] = endPt[g] - gTest;


        infoc = getStep(/* x, dir, newX, f0, g0, */
                        newPt, bestPt, endPt, stpMin, stpMax);


        bestPt[f] = bestPt[f] + bestPt[a] * gTest;
        endPt[f] = endPt[f] + endPt[a] * gTest;


        bestPt[g] = bestPt[g] + gTest;
        endPt[g] = endPt[g] + gTest;
      } else {
        infoc = getStep(/* x, dir, newX, f0, g0, */
                        newPt, bestPt, endPt, stpMin, stpMax);
      }


      if (bracketed) {
        if (Math.abs(endPt[a] - bestPt[a]) >= p66 * width1) {
          newPt[a] = bestPt[a] + p5 * (endPt[a] - bestPt[a]);
        }
        width1 = width;
        width = Math.abs(endPt[a] - bestPt[a]);
      }


    } while (true);
  }




  /**
   * getStep()
   *
   * THIS FUNCTION IS A TRANSLATION OF A TRANSLATION OF THE MINPACK SUBROUTINE
   * cstep(). Dianne O'Leary July 1991
   *
   * It was then interpreted from the implementation supplied by Andrew
   * Bradley. Modifications have been made for this particular application.
   *
   * This function is used to find a new safe guarded step to be used for
   * line search procedures.
   *
   */
  private int getStep(
        /* double[] x, double[] dir, double[] newX, double f0,
        double g0, // None of these were used */
        double[] newPt, double[] bestPt, double[] endPt,
        double stpMin, double stpMax) throws MaxEvaluationsExceeded {


    // Should check for input errors.
    int info; // = 0; always set in the if below
    boolean bound; // = false; always set in the if below
    double theta, gamma, p, q, r, s, stpc, stpq, stpf;
    double signG = newPt[g] * bestPt[g] / Math.abs(bestPt[g]);


    //
    // First case. A higher function value.
    // The minimum is bracketed. If the cubic step is closer
    // to stx than the quadratic step, the cubic step is taken,
    // else the average of the cubic and quadratic steps is taken.
    //
    if (newPt[f] > bestPt[f]) {
      info = 1;
      bound = true;
      theta = 3 * (bestPt[f] - newPt[f]) / (newPt[a] - bestPt[a]) + bestPt[g]
          + newPt[g];
      s = Math.max(Math.max(theta, newPt[g]), bestPt[g]);
      gamma = s
          * Math.sqrt((theta / s) * (theta / s) - (bestPt[g] / s)
              * (newPt[g] / s));
      if (newPt[a] < bestPt[a]) {
        gamma = -gamma;
      }
      p = (gamma - bestPt[g]) + theta;
      q = ((gamma - bestPt[g]) + gamma) + newPt[g];
      r = p / q;
      stpc = bestPt[a] + r * (newPt[a] - bestPt[a]);
      stpq = bestPt[a]
          + ((bestPt[g] / ((bestPt[f] - newPt[f]) / (newPt[a] - bestPt[a]) + bestPt[g])) / 2)
          * (newPt[a] - bestPt[a]);


      if (Math.abs(stpc - bestPt[a]) < Math.abs(stpq - bestPt[a])) {
        stpf = stpc;
      } else {
        stpf = stpq;
        // stpf = stpc + (stpq - stpc)/2;
      }
      bracketed = true;
      if (newPt[a] < 0.1) {
        stpf = 0.01 * stpf;
      }


    } else if (signG < 0.0) {
      //
      // Second case. A lower function value and derivatives of
      // opposite sign. The minimum is bracketed. If the cubic
      // step is closer to stx than the quadratic (secant) step,
      // the cubic step is taken, else the quadratic step is taken.
      //
      info = 2;
      bound = false;
      theta = 3 * (bestPt[f] - newPt[f]) / (newPt[a] - bestPt[a]) + bestPt[g]
          + newPt[g];
      s = Math.max(Math.max(theta, bestPt[g]), newPt[g]);
      gamma = s
          * Math.sqrt((theta / s) * (theta / s) - (bestPt[g] / s)
              * (newPt[g] / s));
      if (newPt[a] > bestPt[a]) {
        gamma = -gamma;
      }
      p = (gamma - newPt[g]) + theta;
      q = ((gamma - newPt[g]) + gamma) + bestPt[g];
      r = p / q;
      stpc = newPt[a] + r * (bestPt[a] - newPt[a]);
      stpq = newPt[a] + (newPt[g] / (newPt[g] - bestPt[g]))
          * (bestPt[a] - newPt[a]);
      if (Math.abs(stpc - newPt[a]) > Math.abs(stpq - newPt[a])) {
        stpf = stpc;
      } else {
        stpf = stpq;
      }
      bracketed = true;


    } else if (Math.abs(newPt[g]) < Math.abs(bestPt[g])) {
      //
      // Third case. A lower function value, derivatives of the
      // same sign, and the magnitude of the derivative decreases.
      // The cubic step is only used if the cubic tends to infinity
      // in the direction of the step or if the minimum of the cubic
      // is beyond stp. Otherwise the cubic step is defined to be
      // either stpmin or stpmax. The quadratic (secant) step is also
      // computed and if the minimum is bracketed then the the step
      // closest to stx is taken, else the step farthest away is taken.
      //
      info = 3;
      bound = true;
      theta = 3 * (bestPt[f] - newPt[f]) / (newPt[a] - bestPt[a]) + bestPt[g]
          + newPt[g];
      s = Math.max(Math.max(theta, bestPt[g]), newPt[g]);
      gamma = s
          * Math.sqrt(Math.max(0.0, (theta / s) * (theta / s) - (bestPt[g] / s)
              * (newPt[g] / s)));
      if (newPt[a] < bestPt[a]) {
        gamma = -gamma;
      }
      p = (gamma - bestPt[g]) + theta;
      q = ((gamma - bestPt[g]) + gamma) + newPt[g];
      r = p / q;
      if (r < 0.0 && gamma != 0.0) {
        stpc = newPt[a] + r * (bestPt[a] - newPt[a]);
      } else if (newPt[a] > bestPt[a]) {
        stpc = stpMax;
      } else {
        stpc = stpMin;
      }
      stpq = newPt[a] + (newPt[g] / (newPt[g] - bestPt[g]))
          * (bestPt[a] - newPt[a]);


      if (bracketed) {
        if (Math.abs(newPt[a] - stpc) < Math.abs(newPt[a] - stpq)) {
          stpf = stpc;
        } else {
          stpf = stpq;
        }
      } else {
        if (Math.abs(newPt[a] - stpc) > Math.abs(newPt[a] - stpq)) {
          stpf = stpc;
        } else {
          stpf = stpq;
        }
      }


    } else {
      //
      // Fourth case. A lower function value, derivatives of the
      // same sign, and the magnitude of the derivative does
      // not decrease. If the minimum is not bracketed, the step
      // is either stpmin or stpmax, else the cubic step is taken.
      //
      info = 4;
      bound = false;


      if (bracketed) {
        theta = 3 * (bestPt[f] - newPt[f]) / (newPt[a] - bestPt[a]) + bestPt[g]
            + newPt[g];
        s = Math.max(Math.max(theta, bestPt[g]), newPt[g]);
        gamma = s
            * Math.sqrt((theta / s) * (theta / s) - (bestPt[g] / s)
                * (newPt[g] / s));
        if (newPt[a] > bestPt[a]) {
          gamma = -gamma;
        }
        p = (gamma - newPt[g]) + theta;
        q = ((gamma - newPt[g]) + gamma) + bestPt[g];
        r = p / q;
        stpc = newPt[a] + r * (bestPt[a] - newPt[a]);
        stpf = stpc;
      } else if (newPt[a] > bestPt[a]) {
        stpf = stpMax;
      } else {
        stpf = stpMin;
      }


    }


    //
    // Update the interval of uncertainty. This update does not
    // depend on the new step or the case analysis above.
    //
    if (newPt[f] > bestPt[f]) {
      copy(newPt, endPt);
    } else {
      if (signG < 0.0) {
        copy(bestPt, endPt);
      }
      copy(newPt, bestPt);
    }


    say(String.valueOf(info));


    //
    // Compute the new step and safeguard it.
    //
    stpf = Math.min(stpMax, stpf);
    stpf = Math.max(stpMin, stpf);
    newPt[a] = stpf;


    if (bracketed && bound) {
      if (endPt[a] > bestPt[a]) {
        newPt[a] = Math.min(bestPt[a] + p66 * (endPt[a] - bestPt[a]), newPt[a]);
      } else {
        newPt[a] = Math.max(bestPt[a] + p66 * (endPt[a] - bestPt[a]), newPt[a]);
      }
    }


    return info;
  }


  private static void copy(double[] src, double[] dest) {
    System.arraycopy(src, 0, dest, 0, src.length);
  }


  //
  //
  //
  // private double[] lineSearchNocedal(DiffFunction dfunc, double[] dir,
  // double[] x, double[] newX, double[] grad, double f0) throws
  // MaxEvaluationsExceeded {
  //
  //
  // double g0 = ArrayMath.innerProduct(grad,dir);
  // if(g0 > 0){
  // //We're looking in a direction of positive gradient. This wont' work.
  // //set dir = -grad
  // plusAndConstMult(new double[x.length],grad,-1,dir);
  // g0 = ArrayMath.innerProduct(grad,dir);
  // }
  // say("(" + nf.format(g0) + ")");
  //
  //
  // double[] newPoint = new double[3];
  // double[] prevPoint = new double[3];
  // newPoint[a] = 1.0; //Always guess 1 first, this should be right if the
  // function is "nice" and BFGS is working.
  //
  // //Special guess for the first iteration.
  // if(its == 1){
  // double aLin = - f0 / (ftol*g0);
  // //Keep aLin within aMin and 1 for the first guess. But make a more
  // intelligent guess based off the gradient
  // aLin = Math.min(1.0, aLin);
  // aLin = Math.max(aMin, aLin);
  // newPoint[a] = aLin; // Guess low at first since we have no idea of scale at
  // first.
  // }
  //
  // prevPoint[a] = 0.0;
  // prevPoint[f] = f0;
  // prevPoint[g] = g0;
  //
  // int cnt = 0;
  //
  // do{
  // newPoint[f] = dfunc.valueAt((plusAndConstMult(x, dir, newPoint[a], newX)));
  // newPoint[g] = ArrayMath.innerProduct(dfunc.derivativeAt(newX),dir);
  // fevals += 1;
  //
  // //If fNew > f0 + small*aNew*g0 or fNew > fPrev
  // if( (newPoint[f] > f0 + ftol*newPoint[a]*g0) || newPoint[f] > prevPoint[f]
  // ){
  // //We know there must be a point that satisfies the strong wolfe conditions
  // between
  // //the previous and new point, so search between these points.
  // say("->");
  // return zoom(dfunc,x,dir,newX,f0,g0,prevPoint,newPoint);
  // }
  //


  // //Here we check if the magnitude of the gradient has decreased, if
  // //it is more negative we can expect to find a much better point
  // //by stepping a little farther.
  //
  // //If |gNew| < 0.9999 |g0|
  // if( Math.abs(newPoint[g]) <= -gtol*g0 ){
  // //This is exactly what we wanted
  // return newPoint;
  // }
  //
  // if (newPoint[g] > 0){
  // //Hmm, our step is too big to be a satisfying point, lets look backwards.
  // say("<-");//say("^");
  //
  // return zoom(dfunc,x,dir,newX,f0,g0,newPoint,prevPoint);
  // }
  //
  // //if we made it here, our function value has decreased enough, but the
  // gradient is more negative.
  // //we should increase our step size, since we have potential to decrease the
  // function
  // //value a lot more.
  // newPoint[a] *= 10; // this is stupid, we should interpolate it. since we
  // already have info for quadratic at least.
  // newPoint[f] = Double.NaN;
  // newPoint[g] = Double.NaN;
  // cnt +=1;
  // say("*");
  //
  // //if(cnt > 10 || fevals > maxFevals){
  // if(fevals > maxFevals){ throw new MaxEvaluationsExceeded(" Exceeded during
  // zoom() Function ");}
  //
  // if(newPoint[a] > aMax){
  // System.err.println(" max stepsize reached. This is unusual. ");
  // System.exit(1);
  // }
  //
  // }while(true);
  //
  // }


  // private double interpolate( double[] point0, double[] point1){
  // double newAlpha;


  // double intvl = Math.abs(point0[a] -point1[a]);


  // //if(point2 == null){
  // if( Double.isNaN(point0[g]) ){
  // //We dont know the gradient at aLow so do bisection
  // newAlpha = 0.5*(point0[a] + point1[a]);
  // }else{
  // //We know the gradient so do Quadratic 2pt
  // newAlpha = interpolateQuadratic2pt(point0,point1);
  // }


  // //If the newAlpha is outside of the bounds just do bisection.


  // if( ((newAlpha > point0[a]) && (newAlpha > point1[a])) ||
  // ((newAlpha < point0[a]) && (newAlpha < point1[a])) ){


  // //bisection.
  // return 0.5*(point0[a] + point1[a]);
  // }


  // //If we aren't moving fast enough, revert to bisection.
  // if( ((newAlpha/intvl) < 1e-6) || ((newAlpha/intvl) > (1- 1e-6)) ){
  // //say("b");
  // return 0.5*(point0[a] + point1[a]);
  // }


  // return newAlpha;
  // }


  /*
   * private double interpolate( List<double[]> pointList ,) {
   *
   * int n = pointList.size(); double newAlpha = 0.0;
   *
   * if( n > 2){ newAlpha =
   * interpolateCubic(pointList.get(0),pointList.get(n-2),pointList.get(n-1));
   * }else if(n == 2){
   *
   * //Only have two points
   *
   * if( Double.isNaN(pointList.get(0)[gInd]) ){ // We don't know the gradient at
   * aLow so do bisection newAlpha = 0.5*(pointList.get(0)[aInd] +
   * pointList.get(1)[aInd]); }else{ // We know the gradient so do Quadratic 2pt
   * newAlpha = interpolateQuadratic2pt(pointList.get(0),pointList.get(1)); }
   *
   * }else { //not enough info to interpolate with!
   * System.err.println("QNMinimizer:interpolate() attempt to interpolate with
   * only one point."); System.exit(1); }
   *
   * return newAlpha;
   *  }
   */


  // Returns the minimizer of a quadratic running through point (a0,f0) with
  // derivative g0 and passing through (a1,f1).
  // private double interpolateQuadratic2pt(double[] pt0, double[] pt1){
  // if( Double.isNaN(pt0[g]) ){
  // System.err.println("QNMinimizer:interpolateQuadratic - Gradient at point
  // zero doesn't exist, interpolation failed");
  // System.exit(1);
  // }
  // double aDif = pt1[a]-pt0[a];
  // double fDif = pt1[f]-pt0[f];
  // return (- pt0[g]*aDif*aDif)/(2*(fDif-pt0[g]*aDif)) + pt0[a];
  // }
  // private double interpolateCubic(double[] pt0, double[] pt1, double[] pt2){
  // double a0 = pt1[a]-pt0[a];
  // double a1 = pt2[a]-pt0[a];
  // double f0 = pt1[f]-pt0[f];
  // double f1 = pt2[f]-pt0[f];
  // double g0 = pt0[g];
  // double[][] mat = new double[2][2];
  // double[] rhs = new double[2];
  // double[] coefs = new double[2];
  // double scale = 1/(a0*a0*a1*a1*(a1-a0));
  // mat[0][0] = a0*a0;
  // mat[0][1] = -a1*a1;
  // mat[1][0] = -a0*a0*a0;
  // mat[1][1] = a1*a1*a1;
  // rhs[0] = f1 - g0*a1;
  // rhs[1] = f0 - g0*a0;
  // for(int i=0;i<2;i++){
  // for(int j=0;j<2;j++){
  // coefs[i] += mat[i][j]*rhs[j];
  // }
  // coefs[i] *= scale;
  // }
  // double a = coefs[0];
  // double b = coefs[1];
  // double root = b*b-3*a*g0;
  // if( root < 0 ){
  // System.err.println("QNminimizer:interpolateCubic - interpolate failed");
  // System.exit(1);
  // }
  // return (-b+Math.sqrt(root))/(3*a);
  // }


  // private double[] zoom(DiffFunction dfunc, double[] x, double[] dir,
  // double[] newX, double f0, double g0, double[] bestPoint, double[] endPoint)
  // throws MaxEvaluationsExceeded {
  // return zoom(dfunc,x, dir, newX,f0,g0, bestPoint, endPoint,null);
  // }


  // private double[] zoom(DiffFunction dfunc, double[] x, double[] dir,
  // double[] newX, double f0, double g0, double[] bestPt, double[] endPt,
  // double[] newPt) throws MaxEvaluationsExceeded {
  // double width = Math.abs(bestPt[a] - endPt[a]);
  // double reduction = 1.0;
  // double p66 = 0.66;
  // int info = 0;
  // double stpf;
  // double theta,gamma,s,p,q,r,stpc,stpq;
  // boolean bound = false;
  // boolean bracketed = false;
  // int cnt = 1;
  // if(newPt == null){ newPt = new double[3]; newPt[a] =
  // interpolate(bestPt,endPt);}// quadratic interp


  // do{
  // say(".");
  // newPt[f] = dfunc.valueAt((plusAndConstMult(x, dir, newPt[a] , newX)));
  // newPt[g] = ArrayMath.innerProduct(dfunc.derivativeAt(newX),dir);
  // fevals += 1;
  // //If we have satisfied Wolfe...
  // //fNew <= f0 + small*aNew*g0
  // //|gNew| <= 0.9999*|g0|
  // //return the point.
  // if( (newPt[f] <= f0 + ftol*newPt[a]*g0) && Math.abs(newPt[g]) <= -gtol*g0
  // ){
  // //Sweet, we found a point that satisfies the strong wolfe conditions!!!
  // lets return it.
  // return newPt;
  // }else{


  // double signG = newPt[g]*bestPt[g]/Math.abs(bestPt[g]);
  // //Our new point has a higher function value
  // if( newPt[f] > bestPt[f]){
  // info = 1;
  // bound = true;
  // theta = 3*(bestPt[f] - newPt[f])/(newPt[a] - bestPt[a]) + bestPt[g] +
  // newPt[g];
  // s = Math.max(Math.max(theta,newPt[g]), bestPt[g]);
  // gamma = s*Math.sqrt( (theta/s)*(theta/s) - (bestPt[g]/s)*(newPt[g]/s) );
  // if (newPt[a] < bestPt[a]){
  // gamma = -gamma;
  // }
  // p = (gamma - bestPt[g]) + theta;
  // q = ((gamma-bestPt[g]) + gamma) + newPt[g];
  // r = p/q;
  // stpc = bestPt[a] + r*(newPt[a] - bestPt[a]);
  // stpq = bestPt[a] +
  // ((bestPt[g]/((bestPt[f]-newPt[f])/(newPt[a]-bestPt[a])+bestPt[g]))/2)*(newPt[a]
  // - bestPt[a]);
  // if ( Math.abs(stpc-bestPt[a]) < Math.abs(stpq - bestPt[a] )){
  // stpf = stpc;
  // } else{
  // stpf = stpq;
  // //stpf = stpc + (stpq - stpc)/2;
  // }
  // bracketed = true;
  // if (newPt[a] < 0.1){
  // stpf = 0.01*stpf;
  // }


  // } else if (signG < 0.0){
  // info = 2;
  // bound = false;
  // theta = 3*(bestPt[f] - newPt[f])/(newPt[a] - bestPt[a]) + bestPt[g] +
  // newPt[g];
  // s = Math.max(Math.max(theta,bestPt[g]),newPt[g]);
  // gamma = s*Math.sqrt((theta/s)*(theta/s) - (bestPt[g]/s)*(newPt[g]/s));
  // if (newPt[a] > bestPt[a]) {
  // gamma = -gamma;
  // }
  // p = (gamma - newPt[g]) + theta;
  // q = ((gamma - newPt[g]) + gamma) + bestPt[g];
  // r = p/q;
  // stpc = newPt[a] + r*(bestPt[a] - newPt[a]);
  // stpq = newPt[a] + (newPt[g]/(newPt[g]-bestPt[g]))*(bestPt[a] - newPt[a]);
  // if (Math.abs(stpc-newPt[a]) > Math.abs(stpq-newPt[a])){
  // stpf = stpc;
  // } else {
  // stpf = stpq;
  // }
  // bracketed = true;
  // } else if ( Math.abs(newPt[g]) < Math.abs(bestPt[g])){
  // info = 3;
  // bound = true;
  // theta = 3*(bestPt[f] - newPt[f])/(newPt[a] - bestPt[a]) + bestPt[g] +
  // newPt[g];
  // s = Math.max(Math.max(theta,bestPt[g]),newPt[g]);
  // gamma = s*Math.sqrt(Math.max(0.0,(theta/s)*(theta/s) -
  // (bestPt[g]/s)*(newPt[g]/s)));
  // if (newPt[a] < bestPt[a]){
  // gamma = -gamma;
  // }
  // p = (gamma - bestPt[g]) + theta;
  // q = ((gamma-bestPt[g]) + gamma) + newPt[g];
  // r = p/q;
  // if (r < 0.0 && gamma != 0.0){
  // stpc = newPt[a] + r*(bestPt[a] - newPt[a]);
  // } else if (newPt[a] > bestPt[a]){
  // stpc = aMax;
  // } else{
  // stpc = aMin;
  // }
  // stpq = newPt[a] + (newPt[g]/(newPt[g]-bestPt[g]))*(bestPt[a] - newPt[a]);
  // if(bracketed){
  // if (Math.abs(newPt[a]-stpc) < Math.abs(newPt[a]-stpq)){
  // stpf = stpc;
  // } else {
  // stpf = stpq;
  // }
  // } else{
  // if (Math.abs(newPt[a]-stpc) > Math.abs(newPt[a]-stpq)){
  // stpf = stpc;
  // } else {
  // stpf = stpq;
  // }
  // }


  // }else{
  // info = 4;
  // bound = false;
  // if (bracketed){
  // theta = 3*(bestPt[f] - newPt[f])/(newPt[a] - bestPt[a]) + bestPt[g] +
  // newPt[g];
  // s = Math.max(Math.max(theta,bestPt[g]),newPt[g]);
  // gamma = s*Math.sqrt((theta/s)*(theta/s) - (bestPt[g]/s)*(newPt[g]/s));
  // if (newPt[a] > bestPt[a]) {
  // gamma = -gamma;
  // }
  // p = (gamma - newPt[g]) + theta;
  // q = ((gamma - newPt[g]) + gamma) + bestPt[g];
  // r = p/q;
  // stpc = newPt[a] + r*(bestPt[a] - newPt[a]);
  // stpf = stpc;
  // }else if( newPt[a] > bestPt[a]){
  // stpf = aMax;
  // } else {
  // stpf = aMin;
  // }
  // }
  // //Reduce the interval of uncertainty
  // if (newPt[f] > bestPt[f]) {
  // copy(newPt,endPt);
  // }else{
  // if (signG < 0.0){
  // copy(bestPt,endPt);
  // }
  // copy(newPt,bestPt);
  // }
  // say("" + info );
  // newPt[a] = stpf;
  // if(bracketed && bound){
  // if (endPt[a] > bestPt[a]){
  // newPt[a] = Math.min(bestPt[a]+p66*(endPt[a]-bestPt[a]),newPt[a]);
  // }else{
  // newPt[a] = Math.max(bestPt[a]+p66*(endPt[a]-bestPt[a]),newPt[a]);
  // }
  // }
  // }
  // //Check to see if the step has reached an extreme.
  // newPt[a] = Math.max(aMin, newPt[a]);
  // newPt[a] = Math.min(aMax,newPt[a]);
  // if( newPt[a] == aMin || newPt[a] == aMax){
  // return newPt;
  // }
  // cnt +=1;
  // if(fevals > maxFevals){
  // throw new MaxEvaluationsExceeded(" Exceeded during zoom() Function ");}
  // }while(true);
  // }
  // private double[] zoom2(DiffFunction dfunc, double[] x, double[] dir,
  // double[] newX, double f0, double g0, double[] bestPoint, double[] endPoint)
  // throws MaxEvaluationsExceeded {
  //
  // double[] newPoint = new double[3];
  // double width = Math.abs(bestPoint[a] - endPoint[a]);
  // double reduction = 0.0;
  //
  // int cnt = 1;
  //
  // //make sure the interval reduces enough.
  // //if(reduction >= 0.66){
  // //say(" |" + nf.format(reduction)+"| ");
  // //newPoint[a] = 0.5*(bestPoint[a]+endPoint[a]);
  // //} else{
  // newPoint[a] = interpolate(bestPoint,endPoint);// quadratic interp
  // //}
  //
  // do{
  // //Check to see if the step has reached an extreme.
  // newPoint[a] = Math.max(aMin, newPoint[a]);
  // newPoint[a] = Math.min(aMax,newPoint[a]);
  //
  // newPoint[f] = dfunc.valueAt((plusAndConstMult(x, dir, newPoint[a] ,
  // newX)));
  // newPoint[g] = ArrayMath.innerProduct(dfunc.derivativeAt(newX),dir);
  // fevals += 1;
  //
  // //fNew > f0 + small*aNew*g0 or fNew > fLow
  // if( (newPoint[f] > f0 + ftol*newPoint[a]*g0) || newPoint[f] > bestPoint[f]
  // ){
  // //Our new point didn't beat the best point, so just reduce the interval
  // copy(newPoint,endPoint);
  // say(".");//say("l");
  // }else{
  //
  // //if |gNew| <= 0.9999*|g0| If gNew is slightly smaller than g0
  // if( Math.abs(newPoint[g]) <= -gtol*g0 ){
  // //Sweet, we found a point that satisfies the strong wolfe conditions!!!
  // lets return it.
  // return newPoint;
  // }
  //
  // //If we made it this far, we've found a point that has satisfied descent,
  // but hasn't satsified
  // //the decrease in gradient. if the new gradient is telling us >0 we need to
  // look behind us
  // //if the new gradient is negative still we can increase the step.
  // if(newPoint[g]*(endPoint[a] - bestPoint[a] ) >= 0){
  // //Get going the right way.
  // say(".");//say("f");
  // copy(bestPoint,endPoint);
  // }
  //
  // if( (Math.abs(newPoint[a]-bestPoint[a]) < 1e-6) ||
  // (Math.abs(newPoint[a]-endPoint[a]) < 1e-6) ){
  // //Not moving fast enough.
  // sayln("had to improvise a bit");
  // newPoint[a] = 0.5*(bestPoint[a] + endPoint[a]);
  // }
  //
  // say(".");//say("r");
  // copy(newPoint,bestPoint);
  // }
  //
  //
  // if( newPoint[a] == aMin || newPoint[a] == aMax){
  // return newPoint;
  // }
  //
  // reduction = Math.abs(bestPoint[a] - endPoint[a]) / width;
  // width = Math.abs(bestPoint[a] - endPoint[a]);
  //
  // cnt +=1;
  //
  //
  // //if(Math.abs(bestPoint[a] -endPoint[a]) < 1e-12 ){
  // //sayln();
  // //sayln("!!!!!!!!!!!!!!!!!!");
  // //sayln("points are too close");
  // //sayln("!!!!!!!!!!!!!!!!!!");
  // //sayln("f0 " + nf.format(f0));
  // //sayln("f0+crap " + nf.format(f0 + cVal*bestPoint[a]*g0));
  // //sayln("g0 " + nf.format(g0));
  // //sayln("ptLow");
  // //printPt(bestPoint);
  // //sayln();
  // //sayln("ptHigh");
  // //printPt(endPoint);
  // //sayln();
  //
  // //DiffFunctionTester.test(dfunc, x,1e-4);
  // //System.exit(1);
  // ////return dfunc.valueAt((plusAndConstMult(x, dir, aMin , newX)));
  // //}
  //
  // //if( (cnt > 20) ){
  //
  // //sayln("!!!!!!!!!!!!!!!!!!");
  // //sayln("! " + cnt + " iterations. I think we're out of luck");
  // //sayln("!!!!!!!!!!!!!!!!!!");
  // //sayln("f0" + nf.format(f0));
  // //sayln("f0+crap" + nf.format(f0 + cVal*bestPoint[a]*g0));
  // //sayln("g0 " + nf.format(g0));
  // //sayln("bestPoint");
  // //printPt(bestPoint);
  // //sayln();
  // //sayln("ptHigh");
  // //printPt(endPoint);
  // //sayln();
  //
  //
  //
  // ////if( cnt > 25 || fevals > maxFevals){
  // ////System.err.println("Max evaluations exceeded.");
  // ////System.exit(1);
  // ////return dfunc.valueAt((plusAndConstMult(x, dir, aMin , newX)));
  // ////}
  // //}
  //
  // if(fevals > maxFevals){ throw new MaxEvaluationsExceeded(" Exceeded during
  // zoom() Function ");}
  //
  // }while(true);
  //
  // }
  //
  // private double lineSearchNocedal(DiffFunction dfunc, double[] dir, double[]
  // x, double[] newX, double[] grad, double f0, int maxEvals){
  //
  // boolean bracketed = false;
  // boolean stage1 = false;
  // double width = aMax - aMin;
  // double width1 = 2*width;
  // double stepMin = 0.0;
  // double stepMax = 0.0;
  // double xtrapf = 4.0;
  // int nFevals = 0;
  // double TOL = 1e-4;
  // double X_TOL = 1e-8;
  // int info = 0;
  // int infoc = 1;
  //
  // double g0 = ArrayMath.innerProduct(grad,dir);
  // if(g0 > 0){
  // //We're looking in a direction of positive gradient. This wont' work.
  // //set dir = -grad
  // plusAndConstMult(new double[x.length],grad,-1,dir);
  // g0 = ArrayMath.innerProduct(grad,dir);
  // System.err.println("Searching in direction of positive gradient.");
  // }
  // say("(" + nf.format(g0) + ")");
  //
  //
  // double[] newPt = new double[3];
  // double[] bestPt = new double[3];
  // double[] endPt = new double[3];
  //
  // newPt[a] = 1.0; //Always guess 1 first, this should be right if the
  // function is "nice" and BFGS is working.
  //
  // if(its == 1){
  // newPt[a] = 1e-6; // Guess low at first since we have no idea of scale.
  // }
  //
  // bestPt[a] = 0.0;
  // bestPt[f] = f0;
  // bestPt[g] = g0;
  //
  // endPt[a] = 0.0;
  // endPt[f] = f0;
  // endPt[g] = g0;
  //
  // int cnt = 0;
  //
  // do{
  // //Determine the max and min step size given what we know already.
  // if(bracketed){
  // stepMin = Math.min(bestPt[a], endPt[a]);
  // stepMax = Math.max(bestPt[a], endPt[a]);
  // } else{
  // stepMin = bestPt[a];
  // stepMax = newPt[a] + xtrapf*(newPt[a] - bestPt[a]);
  // }
  //
  // //Make sure our next guess is within the bounds
  // newPt[a] = Math.max(newPt[a], stepMin);
  // newPt[a] = Math.min(newPt[a], stepMax);
  //
  // if( (bracketed && (newPt[a] <= stepMin || newPt[a] >= stepMax) )
  // || nFevals > maxEvals || (bracketed & (stepMax-stepMin) <= TOL*stepMax)){
  // System.err.println("Linesearch for QN, Need to make srue that newX is set
  // before returning bestPt. -akleeman");
  // System.exit(1);
  // return bestPt[f];
  // }
  //
  //
  // newPt[f] = dfunc.valueAt((plusAndConstMult(x, dir, newPt[a], newX)));
  // newPt[g] = ArrayMath.innerProduct(dfunc.derivativeAt(newX),dir);
  // nFevals += 1;
  //
  // double fTest = f0 + newPt[a]*g0;
  //
  // System.err.println("fTest " + fTest + " new" + newPt[a] + " newf" +
  // newPt[f] + " newg" + newPt[g] );
  //
  // if( ( bracketed && (newPt[a] <= stepMin | newPt[a] >= stepMax )) || infoc
  // == 0){
  // info = 6;
  // }
  //
  // if( newPt[a] == stepMax && ( newPt[f] <= fTest || newPt[g] >= ftol*g0 )){
  // info = 5;
  // }
  //
  // if( (newPt[a] == stepMin && ( newPt[f] > fTest || newPt[g] >= ftol*g0 ) )){
  // info = 4;
  // }
  //
  // if( (nFevals >= maxEvals)){
  // info = 3;
  // }
  //
  // if( bracketed && stepMax-stepMin <= X_TOL*stepMax){
  // info = 2;
  // }
  //
  // if( (newPt[f] <= fTest) && (Math.abs(newPt[g]) <= - gtol*g0) ){
  // info = 1;
  // }
  //
  // if(info != 0){
  // return newPt[f];
  // }
  //
  // if(stage1 && newPt[f]< fTest && newPt[g] >= ftol*g0){
  // stage1 = false;
  // }
  //
  //
  // if( stage1 && f<= bestPt[f] && f > fTest){
  //
  // double[] newPtMod = new double[3];
  // double[] bestPtMod = new double[3];
  // double[] endPtMod = new double[3];
  //
  // newPtMod[f] = newPt[f] - newPt[a]*ftol*g0;
  // newPtMod[g] = newPt[g] - ftol*g0;
  // bestPtMod[f] = bestPt[f] - bestPt[a]*ftol*g0;
  // bestPtMod[g] = bestPt[g] - ftol*g0;
  // endPtMod[f] = endPt[f] - endPt[a]*ftol*g0;
  // endPtMod[g] = endPt[g] - ftol*g0;
  //
  // //this.cstep(newPtMod, bestPtMod, endPtMod, bracketed);
  //
  // bestPt[f] = bestPtMod[f] + bestPt[a]*ftol*g0;
  // bestPt[g] = bestPtMod[g] + ftol*g0;
  // endPt[f] = endPtMod[f] + endPt[a]*ftol*g0;
  // endPt[g] = endPtMod[g] + ftol*g0;
  //
  // }else{
  // //this.cstep(newPt, bestPt, endPt, bracketed);
  // }
  //
  // double p66 = 0.66;
  // double p5 = 0.5;
  //
  // if(bracketed){
  // if ( Math.abs(endPt[a] - bestPt[a]) >= p66*width1){
  // newPt[a] = bestPt[a] + p5*(endPt[a]-bestPt[a]);
  // }
  // width1 = width;
  // width = Math.abs(endPt[a]-bestPt[a]);
  // }
  //
  //
  //
  // }while(true);
  //
  // }
  //
  // private double cstepBackup( double[] newPt, double[] bestPt, double[]
  // endPt, boolean bracketed ){
  //
  // double p66 = 0.66;
  // int info = 0;
  // double stpf;
  // double theta,gamma,s,p,q,r,stpc,stpq;
  // boolean bound = false;
  //
  // double signG = newPt[g]*bestPt[g]/Math.abs(bestPt[g]);
  //
  //
  // //Our new point has a higher function value
  // if( newPt[f] > bestPt[f]){
  // info = 1;
  // bound = true;
  // theta = 3*(bestPt[f] - newPt[f])/(newPt[a] - bestPt[a]) + bestPt[g] +
  // newPt[g];
  // s = Math.max(Math.max(theta,newPt[g]), bestPt[g]);
  // gamma = s*Math.sqrt( (theta/s)*(theta/s) - (bestPt[g]/s)*(newPt[g]/s) );
  // if (newPt[a] < bestPt[a]){
  // gamma = -gamma;
  // }
  // p = (gamma - bestPt[g]) + theta;
  // q = ((gamma-bestPt[g]) + gamma) + newPt[g];
  // r = p/q;
  // stpc = bestPt[a] + r*(newPt[a] - bestPt[a]);
  // stpq = bestPt[a] +
  // ((bestPt[g]/((bestPt[f]-newPt[f])/(newPt[a]-bestPt[a])+bestPt[g]))/2)*(newPt[a]
  // - bestPt[a]);
  //
  // if ( Math.abs(stpc-bestPt[a]) < Math.abs(stpq - bestPt[a] )){
  // stpf = stpc;
  // } else{
  // stpf = stpc + (stpq - stpc)/2;
  // }
  // bracketed = true;
  //
  // } else if (signG < 0.0){
  //
  // info = 2;
  // bound = false;
  // theta = 3*(bestPt[f] - newPt[f])/(newPt[a] - bestPt[a]) + bestPt[g] +
  // newPt[g];
  // s = Math.max(Math.max(theta,bestPt[g]),newPt[g]);
  // gamma = s*Math.sqrt((theta/s)*(theta/s) - (bestPt[g]/s)*(newPt[g]/s));
  // if (newPt[a] > bestPt[a]) {
  // gamma = -gamma;
  // }
  // p = (gamma - newPt[g]) + theta;
  // q = ((gamma - newPt[g]) + gamma) + bestPt[g];
  // r = p/q;
  // stpc = newPt[a] + r*(bestPt[a] - newPt[a]);
  // stpq = newPt[a] + (newPt[g]/(newPt[g]-bestPt[g]))*(bestPt[a] - newPt[a]);
  // if (Math.abs(stpc-newPt[a]) > Math.abs(stpq-newPt[a])){
  // stpf = stpc;
  // } else {
  // stpf = stpq;
  // }
  // bracketed = true;
  // } else if ( Math.abs(newPt[g]) < Math.abs(bestPt[g])){
  // info = 3;
  // bound = true;
  // theta = 3*(bestPt[f] - newPt[f])/(newPt[a] - bestPt[a]) + bestPt[g] +
  // newPt[g];
  // s = Math.max(Math.max(theta,bestPt[g]),newPt[g]);
  // gamma = s*Math.sqrt(Math.max(0.0,(theta/s)*(theta/s) -
  // (bestPt[g]/s)*(newPt[g]/s)));
  // if (newPt[a] < bestPt[a]){
  // gamma = -gamma;
  // }
  // p = (gamma - bestPt[g]) + theta;
  // q = ((gamma-bestPt[g]) + gamma) + newPt[g];
  // r = p/q;
  // if (r < 0.0 && gamma != 0.0){
  // stpc = newPt[a] + r*(bestPt[a] - newPt[a]);
  // } else if (newPt[a] > bestPt[a]){
  // stpc = aMax;
  // } else{
  // stpc = aMin;
  // }
  // stpq = newPt[a] + (newPt[g]/(newPt[g]-bestPt[g]))*(bestPt[a] - newPt[a]);
  // if (bracketed){
  // if (Math.abs(newPt[a]-stpc) < Math.abs(newPt[a]-stpq)){
  // stpf = stpc;
  // } else {
  // stpf = stpq;
  // }
  // } else {
  // if (Math.abs(newPt[a]-stpc) > Math.abs(newPt[a]-stpq)){
  // System.err.println("modified to take only quad");
  // stpf = stpq;
  // }else{
  // stpf = stpq;
  // }
  // }
  //
  //
  // }else{
  // info = 4;
  // bound = false;
  //
  // if(bracketed){
  // theta = 3*(bestPt[f] - newPt[f])/(newPt[a] - bestPt[a]) + bestPt[g] +
  // newPt[g];
  // s = Math.max(Math.max(theta,bestPt[g]),newPt[g]);
  // gamma = s*Math.sqrt((theta/s)*(theta/s) - (bestPt[g]/s)*(newPt[g]/s));
  // if (newPt[a] > bestPt[a]) {
  // gamma = -gamma;
  // }
  // p = (gamma - newPt[g]) + theta;
  // q = ((gamma - newPt[g]) + gamma) + bestPt[g];
  // r = p/q;
  // stpc = newPt[a] + r*(bestPt[a] - newPt[a]);
  // stpf = stpc;
  // }else if (newPt[a] > bestPt[a]){
  // stpf = aMax;
  // }else{
  // stpf = aMin;
  // }
  //
  // }
  //
  //
  // if (newPt[f] > bestPt[f]) {
  // copy(newPt,endPt);
  // }else{
  // if (signG < 0.0){
  // copy(bestPt,endPt);
  // }
  // copy(newPt,bestPt);
  // }
  //
  // stpf = Math.min(aMax,stpf);
  // stpf = Math.max(aMin,stpf);
  // newPt[a] = stpf;
  // if (bracketed & bound){
  // if (endPt[a] > bestPt[a]){
  // newPt[a] = Math.min(bestPt[a]+p66*(endPt[a]-bestPt[a]),newPt[a]);
  // }else{
  // newPt[a] = Math.max(bestPt[a]+p66*(endPt[a]-bestPt[a]),newPt[a]);
  // }
  // }
  //
  // //newPt[f] =
  // System.err.println("cstep " + nf.format(newPt[a]) + " info " + info);
  // return newPt[a];
  //
  // }


}
Source Code of edu.stanford.nlp.optimization.QNMinimizer$SurpriseConvergence

Related Classes of edu.stanford.nlp.optimization.QNMinimizer$SurpriseConvergence