%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% Calculate the optimal bandwidth for functional estimation of recurrent
% processes as in Bandi et al. (2009) 
%
% This version: Oct 6th, 2009
%
% Authors: Federico M. Bandi, Valentina Corradi, Guillermo Moloche, Daniel Wilhelm.
%
% Disclaimer: This code has been debugged several times and performs satisfactorily.
% However, the authors accept no responsibility for any remaining issues. The code
% is solely provided to help potential users by facilitating empirical implementation 
% of some (not all) of the procedures in the paper "Bandwidth selection for 
% continuous-time Markov processes."
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% How to call this function:
%	optbw(X, T)
% 	optbw(X, T, R)
% 	optbw(X, T, R, [options])
%
% Required input arguments:
% 	X: 				Nx1 vector of observations
%	T: 				time horizon
%
%
% Optional input arguments:
%	R:				Number of random variables generated for randomized 
%					testing	procedure. As defined in Bandi et al. (2009).
%					Default is 500.
%                   
%
% Options:
%	h_grid_dr1:     A vector containing the grid of bandwidths over which
%					the optimal 1st stage drift bw is determined.
%					Default is linspace(0.01, 5, 10).
%	h_grid_dif1:    A vector containing the grid of bandwidths over which
%					the optimal 1st stage diffusion bw is determined.
%					Default is linspace(0.01, 5, 10).
%	h_grid_dr2:     A vector containing the grid of bandwidths over which
%					the optimal 2nd stage drift bw is determined.
%					Default is linspace(0.01, 10, 100).
%	h_grid_dif2:    A vector containing the grid of bandwidths over which
%					the optimal 2nd stage diffusion bw is determined.
%					Default is linspace(0.01, 10, 100).
%	kernel:			One of the following strings, 'TR','BT','PR','QS','TH'
%					or 'NO' naming the kernel function to be used for non-
%					parametric estimation (truncated, Bartlett, Parzen, 
%					Quadratic spectral, Tukey-Hanning or normal, resp.).
%					The normal kernel is default.
%	epsilon:		Epsilon parameter defined in Bandi et al. (2009).
%					Default is 0.01.
%	U:				1x2 vector defining the lower and upper bound of the
%					interval U defined in Bandi et al. (2009). Default is
%					[-1.5 2.5].
%	conf_lev:		Confidence level of the test. Defaul is 0.05.
%	p:				Weighting function pi:U->[0,inf) defined in Bandi et
%					al. (2009). Default is the standard normal pdf.
%	criterion:		1st stage criterion used to find optimal bandwidths.
%					'resid' for the residual-based method by Bandi et al.
%					(2009) or 'CV' for cross-validation. Default is 'resid'.
%
% Outputs:
%	h:				2x2 matrix containing the optimal 1st stage (1st row)
%					and optimal 2nd stage (2nd row) bw's. The 1st column
%					contains the drift and the 2nd column the diffusion bw's.
%	V:				2x2 matrix containing the test statistic V calculated 
%					using the 1st stage (1st row) and 2nd stage (2nd row)
%					bw's. The 1st column refers to the drift and the 2nd to 
%					the diffusion.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

function [h, V] = optbw(X, T, varargin)
	
	% ------------------------------------------------------------------------
	% initializations
	% ------------------------------------------------------------------------
	
	% configure the inputParser
	iP = inputParser;
	iP.addRequired('X', @isnumeric);
	iP.addRequired('T', @isnumeric);
	iP.addOptional('R', 500, @isnumeric);
	iP.addParamValue('h_grid_dr1', linspace(0.01, 5, 10), @isnumeric);
	iP.addParamValue('h_grid_dif1', linspace(0.01, 5, 10), @isnumeric);
	iP.addParamValue('h_grid_dr2', linspace(0.01, 10, 100), @isnumeric);
	iP.addParamValue('h_grid_dif2', linspace(0.01, 10, 100), @isnumeric);
	iP.addParamValue('kernel', 'NO', @(x) any(strcmpi(x, {'TR','BT','PR','QS','TH'})));
	iP.addParamValue('criterion', 'resid', @(x) any(strcmpi(x, {'resid','CV'})));
	iP.addParamValue('epsilon', 0.01, @isnumeric);
	iP.addParamValue('U', [-1.5 2.5], @isnumeric);
	iP.addParamValue('conf_lev', 0.05, @isnumeric);
	iP.addParamValue('p', @normpdf);
	
	iP.parse(X, T, varargin{:});
	
	R = iP.Results.R;
	switch iP.Results.kernel
		case 'TR'
			K = @k_TR;
		case 'BT'
			K = @k_BT;
		case 'PR'
			K = @k_PR;
		case 'QS'
			K = @k_QS;
		case 'TH'
			K = @k_TH;
		case 'NO'
			K = @normpdf;
	end;
	epsilon = iP.Results.epsilon;
	U = iP.Results.U;
	conf_lev = iP.Results.conf_lev;
	p = iP.Results.p;
	
	h_grid1 = [iP.Results.h_grid_dr1; iP.Results.h_grid_dif1];	% first-stage grid
	h_grid2 = [iP.Results.h_grid_dr2; iP.Results.h_grid_dif2];	% second-stage grid
	criterion = iP.Results.criterion;
	V = zeros(2,2);
	V1tilde = zeros(2,2);
	
	% misc. initializations
	N = size(X,1); Delta = T/N; dr = 1;	dif = 2; stage1 = 1; stage2 = 2;
	ul = U(1); uu = U(2); h = zeros(2,2); cv = chi2inv(1-conf_lev, 1);
		
	% nested functions
	function handle = get_mu_single_handle(h)
		handle = @mu_single;
		function m = mu_single(x)	% single-smoothing estimate of the drift function
			kval = K((X-x)/h);
			m = 1/Delta* kval(1:N-1)'*diff(X) / sum(kval);
		end;
	end;
	
	function handle = get_sig_single_handle(h)
		handle = @sig_single;
		function s = sig_single(x)	% single-smoothing estimate of the diffusion function
			kval = K((X-x)/h);
			s = 1/Delta* kval(1:N-1)'*(diff(X).^2) / sum(kval);
		end;
	end;
	
	function handle = get_mu_JK_handle(h)
		handle = @mu_single;
		function m = mu_single(k)	% single-smoothing Jackknife estimate of the drift function
			if (k>1) && (k<N)
				X_k = [X(1:(k-1)); X((k+1):end)];
			elseif k==1
				X_k = X(2:end);
			else
				X_k = X(1:(N-1));
			end;
			kval = K((X_k-X(k))/h);
			m = 1/Delta* kval(1:(N-2))'*diff(X_k) / sum(kval);
		end;
	end;
	
	function handle = get_sig_JK_handle(h)
		handle = @sig_single;
		function s = sig_single(k)	% single-smoothing Jackknife estimate of the diffusion function
			if (k>1) && (k<N)
				X_k = [X(1:(k-1)); X((k+1):end)];
			elseif k==1
				X_k = X(2:end);
			else
				X_k = X(1:(N-1));
			end;
			kval = K((X_k-X(k))/h);
			s = 1/Delta* kval(1:(N-2))'*(diff(X_k).^2) / sum(kval);
		end;
	end;
	
	function handle = get_loc_time_handle(h)
		handle = @loc_time;
		function l = loc_time(a)	% estimate local time at point a
			l = Delta/h*sum(K((X-a)/h));
		end
	end;

	function m = mu(z,h)	% estimates mu for each location given in vector z and fixed h
		mu_handle = get_mu_single_handle(h);
		m = arrayfun(mu_handle, z);
	end;
	
	function s = sig(z,h) 	% estimates sigma for each location given in vector z and fixed h
		sig_handle = get_sig_single_handle(h);
		s = arrayfun(sig_handle, z);
	end;
	
	function m = mu_JK(h)	% Jackknife estimates mu for each location given in data vector X and fixed h
		mu_handle = get_mu_JK_handle(h);
		m = arrayfun(mu_handle, 1:N);
	end;
	
	function s = sig_JK(h) 	% Jackknife estimates sigma for each location given in data vector X and fixed h
		sig_handle = get_sig_JK_handle(h);
		s = arrayfun(sig_handle, 1:N);
	end;
	
	function r = res(h) 	% estimates the residuals for a particular h
		r = (diff(X) - mu(X(1:N-1),h(dr))*Delta)./sqrt((sig(X(1:N-1),h(dif))*Delta));
	end;
	
	function V1tilde = calc_V1(h, eta, bw_type)		% calculate V1
		ltime = get_loc_time_handle(h);
		int_loc_time = simpson(min(X), max(X), 50, ltime);
		
		switch bw_type
			case dr		% calculations for the drift case
				v1 = sqrt(exp( h^(5-epsilon)                          * int_loc_time      )) * eta(:,1);
				V1 = @(u) 2/sqrt(R)*sum((v1 <= u) - 1/2);
				V1tilde = simpson(ul, uu, 50, @(u) V1(u)^2*p(u));
			case dif	% calculations for the diffusion case
				v1 = sqrt(exp( h^(5-epsilon)/Delta                    * int_loc_time      )) * eta(:,1);
				V1 = @(u) 2/sqrt(R)*sum((v1 <= u) - 1/2);
				V1tilde = simpson(ul, uu, 50, @(u) V1(u)^2*p(u));
		end;
	end;
	
	function V2tilde = calc_V2(h, eta)		% calculate V2
		ltime = get_loc_time_handle(h);
		int_loc_time = simpson(min(X), max(X), 50, ltime);
		
		v2 = sqrt(exp((h^(1+epsilon)                          * int_loc_time)^(-1))) * eta(:,2);
		V2 = @(u) 2/sqrt(R)*sum((v2 <= u) - 1/2);
		V2tilde = simpson(ul, uu, 50, @(u) V2(u)^2*p(u));
	end;
	
	function V3tilde = calc_V3(h, eta, bw_type)		% calculate V3
		ltime = get_loc_time_handle(h);
		int_loc_time = simpson(min(X), max(X), 50, ltime);
		
		switch bw_type
			case dr		% calculations for the drift case
				v3 = sqrt(exp( sqrt(Delta*log(1/Delta))/h^(1+epsilon) * int_loc_time      )) * eta(:,3);
				V3 = @(u) 2/sqrt(R)*sum((v3 <= u) - 1/2);
				V3tilde = simpson(ul, uu, 50, @(u) V3(u)^2*p(u));
			case dif	% calculations for the diffusion case
				v3 = sqrt(exp( sqrt(Delta*log(1/Delta))/h^(1+epsilon) * int_loc_time      )) * eta(:,3);
				V3 = @(u) 2/sqrt(R)*sum((v3 <= u) - 1/2);
				V3tilde = simpson(ul, uu, 50, @(u) V3(u)^2*p(u));
		end;
	end;
	
	function [V,V1tilde] = calc_V(h, eta, bw_type)	% calculate the statistic V
				
		switch bw_type
			case dr		% calculations for the drift case
				V1tilde = calc_V1(h, eta, bw_type);
				V2tilde = calc_V2(h, eta);
				V3tilde = calc_V3(h, eta, bw_type);
	
				V = min(V1tilde,min(V2tilde,V3tilde));
			case dif	% calculations for the diffusion case
				V1tilde = calc_V1(h, eta, bw_type);
				V3tilde = calc_V3(h, eta, bw_type);
		
				V = min(V1tilde,V3tilde);
		end;	
	end;
	
	function handle = get_V1_handle(eta, bw_type)
		handle = @V1_tmp;
		function v1 = V1_tmp(h) 
			v1 = calc_V1(h, eta, bw_type);
		end
	end;
	
	function handle = get_V2_handle(eta)
		handle = @V2_tmp;
		function v2 = V2_tmp(h) 
			v2 = calc_V2(h, eta);
		end
	end;
	
	function handle = get_V3_handle(eta, bw_type)
		handle = @V3_tmp;
		function v3 = V3_tmp(h) 
			v3 = calc_V3(h, eta, bw_type);
		end
	end;
	
	function [bw, V] = second_stage(V1fn, V2fn, V3fn, direction, bw_type)		% construct 2nd stage bandwidth
		hgr = h_grid2(bw_type,:);
		V1gr = arrayfun(V1fn, hgr);
		V3gr = arrayfun(V3fn, hgr);
        switch bw_type
            case dr
                V2gr = arrayfun(V2fn, hgr);
                Vgr = (V1gr > cv) .* (V2gr > cv) .* (V3gr > cv);
            case dif
                Vgr = (V1gr > cv) .* (V3gr > cv);
        end;
        
		switch direction
			case 'smaller'
				ind = find(Vgr, 1, 'last');
			case 'larger'
				ind = find(Vgr, 1, 'first');
		end;
		if isempty(ind)==true
			ind2 = find(V1gr > cv, 1, 'last');
			if (isempty(ind2)==true) error('Could not find new proposal bw which results in rejecting H0!'); end;
			bw = hgr(ind2);
			V = Vgr(ind2);
		else
			bw = hgr(ind);
			V = Vgr(ind);
		end;		
	end;
	
	
	% ------------------------------------------------------------------------
	% Step 1: propose initial bandwidth
	% ------------------------------------------------------------------------
	
	disp(' '); disp('------ 1st stage ------');
	
	function qv = Q(h) 	% residual-based criterion function
		[f, z] = ecdf(res(h));
		qv = norm(f-normcdf(z,0,1), inf);
	end;
	
	function qv = Q_CV_dr(h)	% cross-validation criterion function (drift)
		muhat_JK = mu_JK(h)';
		qv = sum((diff(X)-muhat_JK(1:N-1)).^2);
	end;
	
	function qv = Q_CV_dif(h)	% cross-validation criterion function (diffusion)
		sigmahat_JK = sig_JK(h)';
		qv = sum((diff(X).^2-sigmahat_JK(1:N-1)).^2);
	end;
	
	% find optimal bw
	switch criterion
		case 'resid'
			% residual-based criterion
			disp('Computing the criterion function (residual-based) over the grid ...');
			Qval = zeros(length(h_grid1(dr,:)), length(h_grid1(dif,:)));
			it = 1;
			for i_dr=1:length(h_grid1(dr,:))
				for i_dif=1:length(h_grid1(dif,:))
					Qval(i_dr,i_dif) = Q([h_grid1(dr,i_dr) h_grid1(dif,i_dif)]);
					disp(strcat(int2str(it), '/', int2str(length(h_grid1(dr,:))*length(h_grid1(dif,:)))));
					it = it + 1;
				end;
			end;
			disp(' ');
				
			[C,I1] = min(Qval);
			[C,I2] = min(C);
			h(1,:) = [h_grid1(dr,I1(I2)) h_grid1(dif,I2)];
			
		case 'CV'
			% cross-validation
			disp('Computing the criterion function (cross-validation) over the grid ...');
			Qval_CV_dif = zeros(length(h_grid1(dif,:)),1);
			Qval_CV_dr = zeros(length(h_grid1(dr,:)),1);
			for i=1:length(h_grid1(dr,:))
				Qval_CV_dr(i) = Q_CV_dr(h_grid1(dr,i));
				disp(strcat(int2str(i), '/', int2str(length(h_grid1(dif,:)) + length(h_grid1(dr,:)))));
			end;
			for i=1:length(h_grid1(dif,:))
				Qval_CV_dif(i) = Q_CV_dif(h_grid1(dif,i));
				disp(strcat(int2str(length(h_grid1(dr,:)) + i), '/', int2str(length(h_grid1(dif,:)) + length(h_grid1(dr,:)))));
			end;
			disp(' ');
			
			[C,I1] = min(Qval_CV_dr);
			[C,I2] = min(Qval_CV_dif);
			h(1,:) = [h_grid1(dr,I1) h_grid1(dif,I2)];
	end;
	
	fprintf('Optimal 1st stage drift bw: %2.4f\n', h(stage1,dr));
	fprintf('Optimal 1st stage diffusion bw: %2.4f\n', h(stage1,dif));
	
	
	
	
	% ------------------------------------------------------------------------
	% Step 2: iterate to find bandwidth which satisfies the rate conditions
	% ------------------------------------------------------------------------
		
	disp(' '); disp('------ 2nd stage ------');
	disp('Testing the rate conditions ...'); disp(' ');
	
	for i=[dr dif]		
		% generate rv's for randomization of test statistic
		rv = randn(3*R,1);
		eta = [rv(1:R) rv((R+1):(2*R)) rv((2*R+1):3*R)];
		
		% calculate test statistic
		[V(stage1,i), V1tilde(stage1,i)] = calc_V(h(stage1,i), eta, i);
		
		
		% ------------------------------------------------------------------------
		% perform test of rate conditions
		% ------------------------------------------------------------------------
		
		if V(stage1,i) > cv	% (a) reject the null, i.e. accept the bandwidth
			h(stage2,i) = h(stage1,i);
			V(stage2,i) = V(stage1,i);
			
		else	% (b) do not reject the null, i.e. propose another bandwidth
			V1fn = get_V1_handle(eta, i);
			V2fn = get_V2_handle(eta);
			V3fn = get_V3_handle(eta, i);
			
			if V(stage1,i) == V1tilde(stage1,i)	% (b.1) propose smaller bw
				[h(stage2,i), V(stage2,i)] = second_stage(V1fn, V2fn, V3fn, 'smaller', i);
			
			else						% (b.2) propose larger bw
				[h(stage2,i), V(stage2,i)] = second_stage(V1fn, V2fn, V3fn, 'larger', i);
			end;
		end;
	end;
	
	fprintf('Optimal 2nd stage drift bw: %2.4f\n', h(stage2,dr));
	fprintf('Optimal 2nd stage diffusion bw: %2.4f\n', h(stage2,dif));
	
end


% ------------------------------------------------------------------------
% supplementary funcions
% ------------------------------------------------------------------------

function s = simpson(a, b, n, f)		% Simpson's Rule with n nodes
	if mod(n,2) > 0
		n = n + 1;
	end;
	h = (b-a)/n;
	x = a:h:b;
	fx = arrayfun(f, x(1:n+1))';
	coeff = [1 kron(ones(1,(n-2)/2),[4 2]) 4 1];
	s = h/3*coeff*fx;
end



% ------------------------------------------------------------------------
% kernel functions
% ------------------------------------------------------------------------

function k = k_TR(x)	% truncated kernel
	k = (abs(x)<=1);
end

function k = k_BT(x)	% Bartlett kernel
	k = (1-abs(x)).*(abs(x)<=1);
end

function k = k_PR(x)	% Parzen kernel
	k = (1-6*x.^2+6*abs(x).^3).*(abs(x)<=0.5).*(abs(x)>=0) + (2*(1-abs(x)).^3).*(abs(x)<=1).*(abs(x)>0.5);
end

function k = k_TH(x)	% Tukey-Hanning kernel
	k = (1+cos(pi*x))/2 .*(abs(x)<=1);
end

function k = k_QS(x)	% Quadratic spectral kernel
	k = 25*ones(size(x))./(12*pi^2*x.^2).*(sin(6*pi*x/5)./(6*pi*x/5)-cos(6*pi*x/5));
end