//-( y - h) .* f'(z^l) where l is the output layer
//delta = labels.sub(activations.get(i)).neg().mul(softMaxDerivative.applyDerivative(activations.get(i)));
Matrix tmpDelta = MatrixUtils.neg( labels.minus( activations.get( i ) ) );
delta = MatrixUtils.elementWiseMultiplication( tmpDelta, softMaxDerivative.applyDerivative(activations.get(i)) );
deltas[i] = delta;
} else {