/* An example of using bootstrap sampling to estimate the MSE for several sample statistics (mean, standard deviation, coefficient of variation). This version uses macro arguments to make it easier to use for other data sets . */ options mprint ; data table811 ; input y @@ ; obs = _n_ ; bootid = obs ; title ' ' ; cards ; 7 11 15 16 20 22 24 25 29 33 34 37 41 42 49 57 66 71 84 90 ; proc print ; * calculate sample statistics ; proc means n mean stddev cv ; run ; * Here information is provided using MACRO variables ; * list the data set and variable name ; %let data = table811 ; %let var = y ; * list the sample size ; %let samplesz = 20 ; * set the bootstrap sample size ; %let bootsampsz = 2000 ; * list the observed mean, std, and cv ; %let obsmean = 38.65 ; %let obsstd = 24.02 ; %let obscv = 62.15 ; * create the bootstrap sample of ids ; data one ; do iter = 1 to &bootsampsz ; do samp = 1 to &samplesz ; bootid = ceil(&samplesz*ranuni(0)) ; output ; end ; end ; proc print data = one (obs = &samplesz) ; run ; proc sort data = &data ; by bootid ; proc sort data = one ; by bootid ; * merge bootstrap ids with the data, creating the bootstrap sample ; data both ; merge &data one ; by bootid ; proc sort data = both ; by iter ; proc print data = both (obs = &samplesz) ; run ; * calculate the sample statistics from each bootstrap sample ; proc means data = both noprint ; var &var ; output out = meansout mean = mn&var stddev = std&var cv = cv&var ; by iter ; run ; proc print noobs data = meansout (obs = 30) ; var iter mn&var std&var cv&var ; title 'Bootstrap samples' ; run ; proc univariate data= meansout noprint; *histogram mn&var std&var cv&var / cfill = blue cframe = ligr; title 'Bootstrap sampling distributions ' ; run; * this output can be used to calculate bootstrap bias and variance ; proc means data = meansout ; var mn&var std&var cv&var ; output out = seinfo mean = mnmn&var mnstd&var mncv&var var = vmn&var vstd&var vcv&var std = smn&var sstd&var scv&var n = nmn&var nstd&var ncv&var ; title 'Bootstrap standard error calculations' ; proc print data = seinfo ; var nmn&var nstd&var ncv&var smn&var sstd&var scv&var ; run ; * this DATA step and Proc MEANS are used to calculate bootstrap estimate of MSE and margin or error ; data msecalc ; set meansout ; mseimean = (mn&var - &obsmean)**2/&bootsampsz ; mseistd = (std&var - &obsstd)**2/&bootsampsz ; mseicv = (cv&var - &obscv)**2/&bootsampsz ; title 'Bootstrap MSE calculations' ; proc means data = msecalc noprint ; var mseimean mseistd mseicv ; output out = finalcalc sum = msemean msestd msecv run ; data finalcalc ; set finalcalc ; moemean = 2*sqrt(msemean) ; moestd = 2*sqrt(msestd) ; moecv = 2*sqrt(msecv) ; proc print ; var msemean msestd msecv moemean moestd moecv ; run ;