%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % Calculate the optimal bandwidth for functional estimation of recurrent % processes as in Bandi et al. (2009) % % This version: Oct 6th, 2009 % % Authors: Federico M. Bandi, Valentina Corradi, Guillermo Moloche, Daniel Wilhelm. % % Disclaimer: This code has been debugged several times and performs satisfactorily. % However, the authors accept no responsibility for any remaining issues. The code % is solely provided to help potential users by facilitating empirical implementation % of some (not all) of the procedures in the paper "Bandwidth selection for % continuous-time Markov processes." % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % How to call this function: % optbw(X, T) % optbw(X, T, R) % optbw(X, T, R, [options]) % % Required input arguments: % X: Nx1 vector of observations % T: time horizon % % % Optional input arguments: % R: Number of random variables generated for randomized % testing procedure. As defined in Bandi et al. (2009). % Default is 500. % % % Options: % h_grid_dr1: A vector containing the grid of bandwidths over which % the optimal 1st stage drift bw is determined. % Default is linspace(0.01, 5, 10). % h_grid_dif1: A vector containing the grid of bandwidths over which % the optimal 1st stage diffusion bw is determined. % Default is linspace(0.01, 5, 10). % h_grid_dr2: A vector containing the grid of bandwidths over which % the optimal 2nd stage drift bw is determined. % Default is linspace(0.01, 10, 100). % h_grid_dif2: A vector containing the grid of bandwidths over which % the optimal 2nd stage diffusion bw is determined. % Default is linspace(0.01, 10, 100). % kernel: One of the following strings, 'TR','BT','PR','QS','TH' % or 'NO' naming the kernel function to be used for non- % parametric estimation (truncated, Bartlett, Parzen, % Quadratic spectral, Tukey-Hanning or normal, resp.). % The normal kernel is default. % epsilon: Epsilon parameter defined in Bandi et al. (2009). % Default is 0.01. % U: 1x2 vector defining the lower and upper bound of the % interval U defined in Bandi et al. (2009). Default is % [-1.5 2.5]. % conf_lev: Confidence level of the test. Defaul is 0.05. % p: Weighting function pi:U->[0,inf) defined in Bandi et % al. (2009). Default is the standard normal pdf. % criterion: 1st stage criterion used to find optimal bandwidths. % 'resid' for the residual-based method by Bandi et al. % (2009) or 'CV' for cross-validation. Default is 'resid'. % % Outputs: % h: 2x2 matrix containing the optimal 1st stage (1st row) % and optimal 2nd stage (2nd row) bw's. The 1st column % contains the drift and the 2nd column the diffusion bw's. % V: 2x2 matrix containing the test statistic V calculated % using the 1st stage (1st row) and 2nd stage (2nd row) % bw's. The 1st column refers to the drift and the 2nd to % the diffusion. % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function [h, V] = optbw(X, T, varargin) % ------------------------------------------------------------------------ % initializations % ------------------------------------------------------------------------ % configure the inputParser iP = inputParser; iP.addRequired('X', @isnumeric); iP.addRequired('T', @isnumeric); iP.addOptional('R', 500, @isnumeric); iP.addParamValue('h_grid_dr1', linspace(0.01, 5, 10), @isnumeric); iP.addParamValue('h_grid_dif1', linspace(0.01, 5, 10), @isnumeric); iP.addParamValue('h_grid_dr2', linspace(0.01, 10, 100), @isnumeric); iP.addParamValue('h_grid_dif2', linspace(0.01, 10, 100), @isnumeric); iP.addParamValue('kernel', 'NO', @(x) any(strcmpi(x, {'TR','BT','PR','QS','TH'}))); iP.addParamValue('criterion', 'resid', @(x) any(strcmpi(x, {'resid','CV'}))); iP.addParamValue('epsilon', 0.01, @isnumeric); iP.addParamValue('U', [-1.5 2.5], @isnumeric); iP.addParamValue('conf_lev', 0.05, @isnumeric); iP.addParamValue('p', @normpdf); iP.parse(X, T, varargin{:}); R = iP.Results.R; switch iP.Results.kernel case 'TR' K = @k_TR; case 'BT' K = @k_BT; case 'PR' K = @k_PR; case 'QS' K = @k_QS; case 'TH' K = @k_TH; case 'NO' K = @normpdf; end; epsilon = iP.Results.epsilon; U = iP.Results.U; conf_lev = iP.Results.conf_lev; p = iP.Results.p; h_grid1 = [iP.Results.h_grid_dr1; iP.Results.h_grid_dif1]; % first-stage grid h_grid2 = [iP.Results.h_grid_dr2; iP.Results.h_grid_dif2]; % second-stage grid criterion = iP.Results.criterion; V = zeros(2,2); V1tilde = zeros(2,2); % misc. initializations N = size(X,1); Delta = T/N; dr = 1; dif = 2; stage1 = 1; stage2 = 2; ul = U(1); uu = U(2); h = zeros(2,2); cv = chi2inv(1-conf_lev, 1); % nested functions function handle = get_mu_single_handle(h) handle = @mu_single; function m = mu_single(x) % single-smoothing estimate of the drift function kval = K((X-x)/h); m = 1/Delta* kval(1:N-1)'*diff(X) / sum(kval); end; end; function handle = get_sig_single_handle(h) handle = @sig_single; function s = sig_single(x) % single-smoothing estimate of the diffusion function kval = K((X-x)/h); s = 1/Delta* kval(1:N-1)'*(diff(X).^2) / sum(kval); end; end; function handle = get_mu_JK_handle(h) handle = @mu_single; function m = mu_single(k) % single-smoothing Jackknife estimate of the drift function if (k>1) && (k1) && (k cv) .* (V2gr > cv) .* (V3gr > cv); case dif Vgr = (V1gr > cv) .* (V3gr > cv); end; switch direction case 'smaller' ind = find(Vgr, 1, 'last'); case 'larger' ind = find(Vgr, 1, 'first'); end; if isempty(ind)==true ind2 = find(V1gr > cv, 1, 'last'); if (isempty(ind2)==true) error('Could not find new proposal bw which results in rejecting H0!'); end; bw = hgr(ind2); V = Vgr(ind2); else bw = hgr(ind); V = Vgr(ind); end; end; % ------------------------------------------------------------------------ % Step 1: propose initial bandwidth % ------------------------------------------------------------------------ disp(' '); disp('------ 1st stage ------'); function qv = Q(h) % residual-based criterion function [f, z] = ecdf(res(h)); qv = norm(f-normcdf(z,0,1), inf); end; function qv = Q_CV_dr(h) % cross-validation criterion function (drift) muhat_JK = mu_JK(h)'; qv = sum((diff(X)-muhat_JK(1:N-1)).^2); end; function qv = Q_CV_dif(h) % cross-validation criterion function (diffusion) sigmahat_JK = sig_JK(h)'; qv = sum((diff(X).^2-sigmahat_JK(1:N-1)).^2); end; % find optimal bw switch criterion case 'resid' % residual-based criterion disp('Computing the criterion function (residual-based) over the grid ...'); Qval = zeros(length(h_grid1(dr,:)), length(h_grid1(dif,:))); it = 1; for i_dr=1:length(h_grid1(dr,:)) for i_dif=1:length(h_grid1(dif,:)) Qval(i_dr,i_dif) = Q([h_grid1(dr,i_dr) h_grid1(dif,i_dif)]); disp(strcat(int2str(it), '/', int2str(length(h_grid1(dr,:))*length(h_grid1(dif,:))))); it = it + 1; end; end; disp(' '); [C,I1] = min(Qval); [C,I2] = min(C); h(1,:) = [h_grid1(dr,I1(I2)) h_grid1(dif,I2)]; case 'CV' % cross-validation disp('Computing the criterion function (cross-validation) over the grid ...'); Qval_CV_dif = zeros(length(h_grid1(dif,:)),1); Qval_CV_dr = zeros(length(h_grid1(dr,:)),1); for i=1:length(h_grid1(dr,:)) Qval_CV_dr(i) = Q_CV_dr(h_grid1(dr,i)); disp(strcat(int2str(i), '/', int2str(length(h_grid1(dif,:)) + length(h_grid1(dr,:))))); end; for i=1:length(h_grid1(dif,:)) Qval_CV_dif(i) = Q_CV_dif(h_grid1(dif,i)); disp(strcat(int2str(length(h_grid1(dr,:)) + i), '/', int2str(length(h_grid1(dif,:)) + length(h_grid1(dr,:))))); end; disp(' '); [C,I1] = min(Qval_CV_dr); [C,I2] = min(Qval_CV_dif); h(1,:) = [h_grid1(dr,I1) h_grid1(dif,I2)]; end; fprintf('Optimal 1st stage drift bw: %2.4f\n', h(stage1,dr)); fprintf('Optimal 1st stage diffusion bw: %2.4f\n', h(stage1,dif)); % ------------------------------------------------------------------------ % Step 2: iterate to find bandwidth which satisfies the rate conditions % ------------------------------------------------------------------------ disp(' '); disp('------ 2nd stage ------'); disp('Testing the rate conditions ...'); disp(' '); for i=[dr dif] % generate rv's for randomization of test statistic rv = randn(3*R,1); eta = [rv(1:R) rv((R+1):(2*R)) rv((2*R+1):3*R)]; % calculate test statistic [V(stage1,i), V1tilde(stage1,i)] = calc_V(h(stage1,i), eta, i); % ------------------------------------------------------------------------ % perform test of rate conditions % ------------------------------------------------------------------------ if V(stage1,i) > cv % (a) reject the null, i.e. accept the bandwidth h(stage2,i) = h(stage1,i); V(stage2,i) = V(stage1,i); else % (b) do not reject the null, i.e. propose another bandwidth V1fn = get_V1_handle(eta, i); V2fn = get_V2_handle(eta); V3fn = get_V3_handle(eta, i); if V(stage1,i) == V1tilde(stage1,i) % (b.1) propose smaller bw [h(stage2,i), V(stage2,i)] = second_stage(V1fn, V2fn, V3fn, 'smaller', i); else % (b.2) propose larger bw [h(stage2,i), V(stage2,i)] = second_stage(V1fn, V2fn, V3fn, 'larger', i); end; end; end; fprintf('Optimal 2nd stage drift bw: %2.4f\n', h(stage2,dr)); fprintf('Optimal 2nd stage diffusion bw: %2.4f\n', h(stage2,dif)); end % ------------------------------------------------------------------------ % supplementary funcions % ------------------------------------------------------------------------ function s = simpson(a, b, n, f) % Simpson's Rule with n nodes if mod(n,2) > 0 n = n + 1; end; h = (b-a)/n; x = a:h:b; fx = arrayfun(f, x(1:n+1))'; coeff = [1 kron(ones(1,(n-2)/2),[4 2]) 4 1]; s = h/3*coeff*fx; end % ------------------------------------------------------------------------ % kernel functions % ------------------------------------------------------------------------ function k = k_TR(x) % truncated kernel k = (abs(x)<=1); end function k = k_BT(x) % Bartlett kernel k = (1-abs(x)).*(abs(x)<=1); end function k = k_PR(x) % Parzen kernel k = (1-6*x.^2+6*abs(x).^3).*(abs(x)<=0.5).*(abs(x)>=0) + (2*(1-abs(x)).^3).*(abs(x)<=1).*(abs(x)>0.5); end function k = k_TH(x) % Tukey-Hanning kernel k = (1+cos(pi*x))/2 .*(abs(x)<=1); end function k = k_QS(x) % Quadratic spectral kernel k = 25*ones(size(x))./(12*pi^2*x.^2).*(sin(6*pi*x/5)./(6*pi*x/5)-cos(6*pi*x/5)); end