function [x, f, optcert, iters] = gradsamp(pars, options)
% Gradient Sampling method to minimize a function f 
% Assumptions: f is continuous everywhere, differentiable almost everywhere
% Note: gradient may be discontinous, and often is at local minimizers
% Call: [x, f, optcert] = gradsamp(pars, options)
% Input: 
%  pars (required): a struct (structure array) defining the problem
%    Required fields for pars:
%      pars.m:       number of variables
%      pars.fgname:  name of m-file to compute function and gradient, given x
%        (in the probability zero event that f is not differentiable at x,
%        it should ideally return a generalized gradient or subgradient: thus,
%        when coding gradient, don't worry about "ties" in function evaluation) 
%        Calling sequence:  [f,grad] = mfilename(x, pars)
%        Actually invoked by: [f,grad] = feval(pars.fgname, x, pars)
%    Optional fields for pars:
%      Any fields needed by the user to code the function/gradient m-file,
%        with any name except pars.fcomparename
%      pars.fcomparename: see additional comments in file, not echoed by "help"
%      pars.n, pars.matfun, etc: used by various matrix functions, e.g.
%        spabsc, sprad, pspabsc, psprad, neg_csr, etc, that are available as 
%        possible choices for pars.fgname
%      
%  options (optional): a struct (structure array) defining the algorithm options
%    Required fields for options:
%       None.  
%    Optional fields for options:
%       options.x0:       each column is a starting vector of variables 
%          default: starting vectors generated randomly, see below
%       options.nruns:    number of runs (number of different starting vectors)
%          ignored if options.x0 is provided
%          default: 3
%       options.x0scale:  scaling vector (length pars.m) for multiplying
%          randomly generated starting vectors
%          ignored if options.x0 is provided
%          default: vector of all ones
%       options.samprad:  vector of sampling radii (positive and decreasing)
%          default: 10^-1, 10^-2, ... 10^-6
%       options.maxit:    max iterations for each sampling radius 
%          default: 100
%       options.dnormtol: stopping tolerance on norm(d), where d is vector with
%          smallest norm in the convex hull of the sampled gradients:
%          either a scalar or a positive, decreasing vector like options.samprad
%          default: 1e-6
%       options.ngrad:    number of sampled gradients per iterate
%          default: 2*m
%       options.xbound:   quit minimizing if ||x||_inf hits this bound 
%          default: 10^6
%       options.subprob:  subproblem to be solved each iteration
%          default: 'Q':  QP subproblem
%          alternative: 'L': LP subproblem (not recommended)
%       options.prtlevel: controls output: 0 (none), 1 (minimal), 2 (verbose)
%          default: 1
%       options.savefile: output is saved file with this name as each run finishes
%          default: no save
%          special string 'clock': file name uses systen clock time and date
% Output: 
%  x:   each column is an approximate minimizer, one for each starting vector
%  f:   each entry is the function value for the corresponding column of x
%  optcert:   each entry is a struct for a different starting vector,
%       providing an approximate local optimality certificate.
%    optcert(k).samprad is the smallest sampling radius used by run k for which
%        the norm of the vector with smallest norm in the convex hull of the
%        sampled gradients was reduced to or below the corresponding
%        tolerance specified in options.dnormtol
%    optcert(k).dnorm is the numerical value of that smallest norm.
%       If the condition was not satisfied for any sampling radius, these
%       values are set to the smallest sampling radius and the corresponding
%       final vector norm.
% Also required:
%  Either the Matlab Optimization Toolbox or MOSEK (www.mosek.com) code
%  "quadprog" (or "linprog" if options.subprob is set to 'L').
%  Generally, MOSEK is far preferable.  To select it, use "addpath ..." to
%  add it to the front of the path.  To see which is in use, type "which quadprog".
%
%  Written by M. Overton (overton@cs.nyu.edu), last revised November 2004

%  References: J.V. Burke, A.S. Lewis and M.L. Overton,
%  -- Two Numerical Methods for Optimizing Matrix Stability,
%   Linear Algebra and its Applications 351-352 (2002), pp. 117-145, and
%  -- A Robust Gradient Sampling Algorithm for Nonsmooth, Nonconvex Optimization
%     SIAM J. Optimization, to appear

%  Additional optional field for pars:
%      pars.fcomparename: the name of an mfile which checks whether the function
%        at a candidate new point is reduced below the previous value, instead
%        of actually computing the new value.  The idea is that in some cases,
%        such as pseudospectral functions, it's much cheaper to make this check.
%        Calling sequence: smaller = mfilename(x, f0, pars),
%        where x is the candidate point, f0 is the comparison value, and
%        pars can be used to pass any other info needed. It returns 1
%        if the function value at x is smaller than f0 and 0 otherwise.
%        Actually invoked by: smaller = feval(fcomparename, x, f0, pars).
%        This mfile is called only by the algorithm's line search.
%        If it is not provided, the mfile given by pars.fgname is called 
%        instead, using a default interface.
%
% check pars and its fields, but first check options.prtlevel
if nargin == 0
   error('pars is a required input parameter, with fields m and fgname')
end;
if nargin == 1
   options = [];
end
if ~isfield(options, 'prtlevel')  % used in some of the pars fields checks
   options.prtlevel = 1;
end

if ~isfield(pars, 'm')
   error('pars.m (number of variables) is a required parameter')
end
if ~isinteger(pars.m) | pars.m < 1
   error('pars.m (number of variables) must be a positive integer')
end
if ~isfield(pars, 'fgname')
   error('pars.fgname (name of file returning function and gradient) is a required parameter')
elseif options.prtlevel > 0
   fprintf('Optimizing %s\n', pars.fgname)
end
if ~isfield(pars, 'fcomparename') | strcmp(pars.fcomparename, '')
   pars.fcomparename = 'fcomparedefault';
elseif options.prtlevel > 0
   fprintf('Using %s to compare function values\n', pars.fcomparename)
end

% check fields of options
if isfield(options, 'x0')
   if size(options.x0,1) ~= pars.m
      error('options.x0 must have pars.m rows')
   end
   options.nruns = size(options.x0,2);
elseif ~isfield(options, 'nruns')
   options.nruns = 3;
elseif ~isinteger(options.nruns) | options.nruns < 1 
   error('options.nruns must be a positive integer') 
end
if ~isfield(options, 'samprad')
   options.samprad = 10.^(-1:-1:-6);
else
   osr = options.samprad;
   if  ~isreal(osr) | min(size(osr)) ~= 1 | min(osr) <= 0 | ...
    min(sort(osr) == fliplr(osr)) == 0
      error('options.samprad must be positive and in decreasing order')
   end
end
if ~isfield(options, 'maxit')
   options.maxit = 100;
elseif ~isinteger(options.maxit) | options.maxit < 1
   error('options.maxit must be a positive integer')
end
if ~isfield(options, 'dnormtol')
   options.dnormtol = 1e-6;   % changed to vector below
end
% options.dnormtol is allowed to be a vector because one of the convergence
% analysis results requires decreasing the tolerance on ||d|| along with the
% sampling radius.  Doesn't make much difference for the easier problems
% and may be counterproductive for harder ones.
if ~isreal(options.dnormtol) | options.dnormtol <= 0
   error('options.dnormtol must be positive')
elseif length(options.dnormtol) == 1 
   options.dnormtol = options.dnormtol*ones(size(options.samprad));
elseif length(options.dnormtol) ~= length(options.samprad)
   error('options.dnormtol must be scalar or vector with length same as length of options.samprad');
end
if ~isfield(options, 'ngrad')
   options.ngrad = 2*pars.m;
elseif ~isinteger(options.ngrad) | options.ngrad < 1
   error('options.ngrad must be a positive integer: a number greater than > pars.m is recommended')
elseif options.ngrad == 1
   disp('since options.ngrad is 1, method reduces to steepest descent: not recommended')
elseif options.ngrad <= pars.m
   disp('options.ngrad <= pars.m: this is not recommended')
end
if ~isfield(options, 'xbound')
   options.xbound = 10^6;
elseif min(size(options.xbound)) ~= 1 | ~isreal(options.xbound) | options.xbound <= 0
   error('options.xbound must be a positive scalar')
end
if ~isfield(options,'x0scale')
   options.x0scale = ones(pars.m, 1);
elseif size(options.x0scale,1) ~= pars.m | size(options.x0scale,2) ~= 1 | ~isreal(options.x0scale)
   error('options.x0scale must be a column vector of length pars.m')
end
if ~isfield(options, 'subprob')
   options.subprob = 'Q';
end
if options.subprob  == 'L'
   if options.prtlevel > 0
      disp('Warning: solving LP subproblems, QP may be better choice')
   end
elseif options.subprob ~= 'Q'
   error('options.subprob is invalid')
end
if isfield(options, 'savefile') & strcmp(options.savefile, 'clock')
   if options.prtlevel > 0
       disp('Using clock to generate save file name')
   end
   options.savefile = clock2str;
end

for run = 1:options.nruns
   if options.prtlevel > 0
      fprintf('Run Number %d \n', run);
   end
   if isfield(options,'x0')   % starting vectors provided
      x0 = options.x0(:,run);
   else                       % generate starting vectors randomly
      x0 = options.x0scale .* randn(pars.m, 1);  % scale the starting values
   end
%
% if f is infinite or NaN at the chosen initial point, mark run as a failure
%
   f0 = feval(pars.fgname, x0, pars);
   if isnan(f0) | f0 == inf
      if isnan(f0) & options.prtlevel > 0 
         disp('function is NaN at initial point')
      elseif f0 == inf & options.prtlevel > 0 
         disp('function is infinite at initial point')
      end
      f(run) = f0;
      x(:,run) = x0;
      optcert(run).dnorm = nan;
      optcert(run).samprad = nan;
      iters(run) = nan;
   else
%
% autoreduce delivers vectors xx, etc; each corresponds to a run for a
% different sampling radius 
%
      [xx, ff, dd, its]  = autoreduce(pars, x0, options);
%
% best function value is the last one  
%
      x(:,run) = xx(:,size(xx,2));
      f(run) = ff(length(ff));
      iters(run) = sum(its);    % sum over all sampling radii 
      if options.prtlevel > 0
         fprintf('final f = %g  total number of iterations = %d\n\n', ...
             f(run), iters(run))
      end
%
% find best optimality certificate: maybe for higher function value  
%
      optcert(run) = findbest(dd, options.samprad, options.dnormtol, ...
         options.prtlevel);
%
% optionally save info in file (this is the only way "iters" can be accessed)
%
      if isfield(options, 'savefile')
         save(options.savefile)
      end
   end
end
