PROC PRINT DATA=simp NOOBS SPLIT="*"; WHERE &outpx GE 1; TITLE2 "Table P1a. Population Simulation Parameters";

Size: px

Start display at page:

Download "PROC PRINT DATA=simp NOOBS SPLIT="*"; WHERE &outpx GE 1; TITLE2 "Table P1a. Population Simulation Parameters";"

Hortense Houston
5 years ago
Views:

1 Simulation Program for Balanced Two Stage Cluster Sampling: Predictors of Realized Random Cluster Means Ed Stanek OPTIONS LINESIZE=140 PAGESIZE=53 NOCENTER NODATE NONUMBER NOFMTERR; ******* * PROJECT NAME: JASA Paper on 2-stage Balanced Cluster Sampling ; * PROGRAM NAME DATE PROGRAMMER ; TITLE1 "Source:ced03p30.sas C:\projects\cluster\programs 5/12/03 EJS " ; * Description: Simulation macro to evaluate predictors of a realized ; * cluster mean from two stage cluster sampling. ; * with changes to correct expressions for the MSE when there is ; * response error and improved efficiency for summarizing results ; * ; * ; * INPUT: none ; ******* LIBNAME new 'J:\projects\cluster\data'; *LIBNAME home v8 'C:\Documents and Settings\Administrator\My Documents\ed\projects\cluster\data'; %MACRO simc(bn,bm,mu_p,popv,popcv,trialn,ns,ms,nms,vbb,chk,outpx,outpxc,outpxu,oute); *****************************************; *** Generate Population Simulation Parameters; *****************************************; DATA simp (LABEL="Simulation Parameters"); bn=&bn; * Number of Clusters in the Population; bm=&bm; * Number of Days for a Subject; mu_p=&mu_p; * Population mean; popv=&popv; * Simulated Variance of Cluster means; popcv=&popcv; * Simulated Average Variance of days within clusters; LABEL bn="# Pop*Cluster:*BN" bm="# Pop*Units:*BM" mu_p="simulated*pop*mean:*mu_p" popv="simulated*cluster*var:*popv" popcv="simulated*unit*var:*popcv"; PROC PRINT DATA=simp NOOBS SPLIT="*"; WHERE &outpx GE 1; TITLE2 "Table P1a. Population Simulation Parameters"; *************************************************; *** Generate the Population of clusters *; *************************************************; DATA p1 (KEEP=s mu_s); ARRAY ys{&bn}; ssum=0; * Variable to construct Pop mean; 1

2 DO s=1 to &bn; * Generate cluster mean; ys{s}=round(&mu_p+sqrt(&popv)*rannor(234311),0.01); *Cluster average; ssum=ssum+ys{s}; ***************************************************; *** Re-center cluster parameters to pop mean ; ***************************************************; scr_mu=ssum/&bn-&mu_p; DO s=1 to &bn; ys{s}=ys{s}-scr_mu; mu_s=ys{s}; OUTPUT; PROC MEANS DATA=p1 N MEAN STD NOPRINT; VAR mu_s ; OUTPUT OUT=p1a VAR(mu_s)=v_musx; DATA p2 (DROP=v_musx); RETAIN v_mus; MERGE p1 p1a (KEEP=v_musx); IF _n_=1 then v_mus=v_musx; match=1; LABEL v_mus="cluster*var:*v_mus" s="cluster*label:*s" mu_s="cluster*parameter:*mu_s"; ********************** *** Generate Unit effects within clusters. These are centered at zero. There is one set ; *** of unit effects, where eventually, cluster means are added to the unit effects to ; *** form cluster-unit parameters ; ********************** DATA u1 (DROP=ssumc scr_muc sqsum u ); FILE PRINT; ***************************************************************; *** Generate Array of deviations of cluster-unit parameters ; ***************************************************************; *** Define arrays for deviations of cluster-unit parameters ; ***************************************************************; ARRAY t{&bm} ; *Array of Unit parameters; ssumc=0; * Variable to construct cluster mean; DO u=1 to &bm; *Generate Unit values for a cluster; t{u}=round(0+sqrt(&popcv)*rannor(124311),0.001); *Unit parameters; ssumc=ssumc+t{u}; ***************************************************; 2

3 *** Re-center unit parameters to zero for a cluster; ***************************************************; scr_muc=ssumc/&bm; * Initial average of unit deviations; sqsum=0; * Variable of squared values to construct cluster var ; DO u=1 to &bm; t{u}=t{u}-scr_muc; sqsum=sqsum+t{u}*t{u}; v_s=(sqsum)/(&bm-1); match=1; *Variable to combine back with cluster means; DATA p2b (DROP=match ); MERGE p2 u1; BY match; DATA p3 (DROP=u); SET p2b; FILE PRINT; ARRAY t{&bm}; *Cluster-unit parameters; DO u=1 to &bm; t{u}=mu_s+t{u}; *Cluster-unit parameter; bn=&bn; bm=&bm; mu_p=&mu_p; popv=&popv; popcv=&popcv; LABEL mu_p="simulated*pop*mean:*mu_p" popv="simulated*cluster*var:*popv" popcv="simulated*unit*var:*popcv" bn="# Pop*Cluster:*BN" bm="# Pop*Units:*BM" mu_p="pop*mean:*mu_p" v_s="parameter*unit*var:*v_s"; *************************************************; *** Obtain average of unit varinace over clusters; *************************************************; PROC MEANS NOPRINT DATA=p3; VAR v_s; OUTPUT OUT=p3a MEAN=v_ex; DATA p0 (DROP=v_ex); RETAIN v_e; MERGE p3 p3a(keep=v_ex); IF _N_=1 THEN v_e=v_ex; LABEL v_e="pop*ave*unit*var:*v_e"; ***** 3

4 *** Print results to summarize population simulation ; ***** * PROC CONTENTS DATA=p0; * WHERE &outpx GE 1; * TITLE2 "Table P2a. Contents of data set for simulated population"; PROC PRINT DATA=p0 (OBS=25) NOOBS SPLIT="*"; WHERE &outpx GE 1; VAR s bn bm mu_p v_mus v_s v_e mu_s t1-t4 ; FORMAT mu_p v_mus v_s v_e mu_s t1-t4 5.1; TITLE2 "Table P2b. List of first 25 clusters in simulated Population"; TITLE3 " for the first four units in a cluster"; PROC MEANS DATA=p0; WHERE &outpx GE 1; TITLE2 "Table P2c. Simple averages of population variables"; *************************************************************; *** Create module to randomly select two stage cluster sample; *** construct, and evaluate predictors ; *************************************************************; ********* *** Select a SRS without replacement of Clusters *; ********* DATA c1 (DROP=u s i p select rn rem_p rem_s ) c2 (KEEP=trial i s outpxc); trialn=&trialn; bn=&bn; ns=&ns; outpxc=&outpxc; LABEL bn="simulated*# Clusters:*BN" ns="sample*# Clusters:*NS"; ***********************************************************; *** Define number of simulations of samples to select *; ***********************************************************; ARRAY sx{&bn} _TEMPORARY_ ; * Array of cluster labels; ARRAY si{&ns}; * Array of cluster labels in the sample; DO trial=1 to &trialn; **************************************; *** Set initial conditions for a trial; **************************************; DO u=1 to &bn; * Create labels for clusters ; sx{u}=u; * Assign value to labeled cluster; 4

5 rem_p=&bn; * Initialize # of remaining clusters not selected in the population; rem_s=&ns; * Initialize # of clusters remaining to be selected in the sample; ****************************************; *** Select a cluster for position i in ; *** the sample ; ****************************************; DO i=1 to rem_s; select=0; * Indicator for selection of earlier cluster in listing; rn=ranuni(3362); * Pick a random number; ********************************************; *** Break the 0-1 interval into equal size *; *** segments with one segment for each *; *** remaining cluster not yet selected *; ********************************************; DO p=1 to rem_p; * For each remaining cluster in the population; IF (rn GE (p-1)/rem_p) AND (rn LT (p)/rem_p) AND select=0 THEN DO; si{i}=sx{p}; * Put the label for the selected cluster in the sample array; select=1; * Set the indicator of having a selected cluster to 1; ******** *** Output directly to strung out data set that can be directly linked to ; *** the population ; ******** s=si{i}; OUTPUT c2; IF select=1 AND p LT &bn THEN sx{p}=sx{p+1}; * Increment the array of un-selected cluster labels; sx{rem_p}=.; * Re-set remaining array value to missing; rem_p=rem_p-1; * Re-set the number of cluster remaining to be selected in the population; rem_s=rem_s-1; * Re-set the number of cluster remaining to be selected in the sample; OUTPUT c1; ************************************************; *** Optionally print some descriptive results *; ************************************************; PROC PRINT DATA=c1 (OBS=20) NOOBS; WHERE &outpxc=2; TITLE2 "Table C2-1. List of sampled Clusters"; PROC PRINT DATA=c2 (OBS=20) NOOBS; WHERE &outpxc=2; TITLE2 "Table C2-2. List of sampled clusters strung out with one record per position per trial"; PROC MEANS DATA=c1 ; 5

6 WHERE &outpxc=2; TITLE2 "Table C2-3. Average Cluster Number for Positions in Sample"; PROC FREQ DATA=c1; WHERE &outpxc=2; TABLES si1-si3; TITLE2 "Table C2-4. Frequency of cluster at a position"; *** Combine list of selected clusters with population data ; PROC SORT DATA=c2; * Arrange selected clusters by cluster number; BY s; DATA samp1; * Merge selected clusters with population data; MERGE p0 c2 (IN=a); BY s; IF a; LABEL i="position*in*sample:*i" trial="trial*number:*trial"; PROC SORT DATA=samp1; BY trial i; **********************************************************; *** Optional output **; **********************************************************; PROC PRINT DATA=samp1 (OBS=25) NOOBS; WHERE &outpxu GE 1; TITLE2 "Table S1-1. List of sampled clusters for first 25 observations"; *** Permute the units within clusters *; DATA samp2 (KEEP=trial s i bn bm ms mu_p mu_s v_mus v_s v_e vbb outpxu j1-j&ms) stest (KEEP=outpxu chk trial s tu j1-j&ms); SET samp1; BY trial; bm=&bm; * Total number of units in a cluster; ms=&ms; * Number of units in a sampled cluster; vbb=&vbb; * Response error variance for cluster-unit; chk=&chk; * Number of times to permute units in the first selected cluster; * as a test of the permutation program ; outpxu=&outpxu; * Indicator for optional output; LABEL bm="# SSUs/PSU*in Pop:*BM" vbb="response*error*var:*vbb" 6

7 ms="# SSU in Sample:*MS" chk="replications*of*unit*permuts:*chk"; ***********************************************************; *** Define number of simulations of samples to select *; ***********************************************************; FILE PRINT; **********************************************************; *** Permute units in a cluster for a trial *; **********************************************************; ARRAY t{&bm}; * Array of population unit values for cluster; ARRAY j{&ms}; * Array of unit-parameter values for positions in sample; ARRAY chkt{&bm}; * Array for use in checking permutations of units; DO ck=1 to &bm; * Set initial values of chk array to unit values; chkt{ck}=t{ck}; IF _N_=1 THEN chk=&chk; * For the first selected cluster, reset the replications of permuts; ELSE chk=1; DO tu=1 to chk; * Replicate the permutations for checking; *****************************************************************; *** Re-set original array of population unit values for a cluster; *** We do this to check the results for a particular cluster ; *****************************************************************; DO ck=1 to &bm; t{ck}=chkt{ck}; ***************************************************************; *** Set initial conditions for permuting the units in a cluster; ***************************************************************; rem_pc=&bm; * Initialize # of remaining units not selected in the cluster; rem_sc=&ms; * Initialize # of units remaining to be selected in the cluster; ****************************************; *** Select a unit for position j in ; *** the cluster ; ****************************************; DO unit=1 to rem_sc; selectc=0; * Indicator for selection of earlier cluster in listing; rn=ranuni(3362); * Pick a random number; ********************************************; *** Break the 0-1 interval into equal size *; *** segments with one segment for each *; *** remaining unit not yet selected *; ********************************************; DO p=1 to rem_pc; * For each remaining unit in the cluster; IF (rn GE (p-1)/rem_pc) AND (rn LT (p)/rem_pc) AND selectc=0 THEN DO; j{unit}=t{p}+sqrt(&vbb)*rannor(2341); * Put the unit-value in the sample position; 7

8 selectc=1; * Set the indicator of having selected a unit to 1; IF selectc=1 AND p LT &bm THEN t{p}=t{p+1}; * Increment the array of un-selected unit values; t{rem_pc}=.; * Re-set remaining array value to missing; rem_pc=rem_pc-1; * Re-set the number of units remaining to be selected in the cluster; rem_sc=rem_sc-1; * Re-set the number of units remaining to be selected in the sample; IF _N_=1 THEN OUTPUT stest; *Output to the test data set for checking permutations; OUTPUT samp2; * Output to sample data set; PROC PRINT DATA=stest (OBS=25); WHERE &outpxu GE 2; VAR trial s tu j1-j4; TITLE2 "Table U2-1. Sample Test of Unit permutations"; PROC MEANS DATA=stest MEAN STDERR N; WHERE &outpxu GE 2; VAR j1-j4; TITLE2 "Table U2-2. Average of Values of positions for permuted units in the first selected Cluster"; *** Construct Predictors and MSE ***; PROC SORT DATA=samp2; BY TRIAL; DATA e1 (KEEP=oute trial i s mu_s p1-p9 smse1-smse9) e2(keep=trial tmse1-tmse9 sqd1-sqd9) ; SET samp2 END=lastrec; * Create variable to compute summary statistics at last record; BY trial; FILE PRINT; ARRAY yj{&ms} j1-j&ms; * Values for a sampled cluster; ARRAY sp{&ns,&ms}; * Values for 2-stage cluster sample; ARRAY slabel{&ns}; * Labels for selected units; ARRAY mus{&ns}; * True Parameter for selected units; ARRAY yibx{&ns}; * Average of values for sample units in sample cluster; ARRAY wib{&ns}; * Array of weights for sampled cluster units; ARRAY yibw{&ns}; * Array of weighted average values for sample units in sample cluster; ARRAY sqd{9}; * Sum of squared deviations of predictors from true mean; oute=&oute; 8

9 nms=&nms; *PUT &nms= nms=; *IF &nms NE &ns*&ms THEN PUT "Number of Sample Units=" &nms ; *IF &nms NE &ns*&ms THEN PUT " Does NOT Match n*m=" &ns " * " &ms ; ***************** *** Initialize sum of squared deviations of predictors about true parameter to zero; ***************** IF _N_=1 THEN DO e=1 to 9; sqd{e}=0; * *** Compute various values needed to evaluate trial averages ; *** We first do this for clusters values without response error ; *** We then repeat it for cluster values with response error ; * ***********************************************************; *** Create an array of data for a trial *; ***********************************************************; RETAIN sp1-sp&nms; *Array of sample data ; RETAIN yibx1-yibx&ns; *Array of sample means of cluster units; RETAIN wib1-wib&ns; *Array of weights for sampled cluster units; RETAIN yibw1-yibw&ns; * Array of weighted average values for sample units in sample cluster; RETAIN slabel1-slabel&ns; * Labels for selected clusters; RETAIN mus1-mus&ns; * Parameter mean for selected clusters; RETAIN sqd1-sqd9; * Sum of the squared deviations of predictors; * RETAIN msb mse; * Sums to determine MSE and MSB to estimate shrinkage constants; ***********************************************; *** Initialize cluster sample values to missing; ***********************************************; IF FIRST.trial EQ 1 THEN DO; DO ix=1 to &ns; yibx{ix}=.; *Initialize cluster sample averages; wib{ix}=.; *Initialize weights for cluster averages to missing; yibw{ix}=.; *Initialize weighted cluster averages to missing; slabel{ix}=.; *Initialize selected unit labels to missing; mus{ix}=.; *Initialize selected unit parameters to missing; DO jx=1 to &ms; SP{ix,jx}=.; *Initialize cluster sample values to missing; * msb=0; *Initialize ms(between clusters) to missing for trial; * mse=0; *Initialize ms(within cluster) to missing for trial; ******************************************************; *** Read in sample data into array of trial values. *; 9

10 *** Evaluate inverse variance weight for WLS mean *; ******************************************************; vi=v_s+vbb; * Variance of units in cluster and response error in cluster; * Note: this is constant for all clusters ; DO ix=1 to &ns; tm=0; tiv=0; DO jx=1 to &ms; IF ix=i THEN sp{ix, jx}=yj{jx}; * Set values for selected cluster-units; tm=tm+sp{ix, jx}; * Compute total of response for sampled units in cluster; tvi=tiv+vi; * Compute total of variance for sampled units in cluster; yibx{ix}=tm/&ms; * Evaluate simple cluster mean ; wib{ix}=1/(v_mus+tvi/&ms); * Quantity to get weight for WLS predictor for Mixed and SS; * Note: this is constant for all clusters ; yibw{ix}=yibx{ix}*wib{ix}; * Evaluate weighted cluster mean; IF ix=i THEN DO; slabel{ix}=s; mus{ix}=mu_s; **************************; * Optional Level 2 Output; **************************; IF &oute GE 2 THEN DO; PUT trial= i= sp{1,1}= sp{1,2}= sp{1,3}= /@10 yibx{1}= yibx{2}= yibx{3}= wib{1}= wib{2}= ; PUT trial= i= sp{2,1}= sp{2,2}= sp{2,3}= /@10 yibx{1}= yibx{2}= yibx{3}= wib{1}= wib{2}= ; PUT trial= i= sp{3,1}= sp{3,2}= sp{3,3}= /@10 yibx{1}= yibx{2}= yibx{3}= wib{1}= wib{2}= /; IF &oute GE 2 THEN DO; PUT trial= yibx{1}= yibx{2}= 'Mean of clusters'; PUT trial= wib{1}= wib{2}= 'inverse variance wt'//; IF &oute GE 2 THEN DO; PUT trial= slabel{1}= slabel{2}= 'Label of selected cluster'; PUT trial= mus{1}= mus{2}= 'Parameter for mean of selected cluster'//; *******************************************************; *** Compute MSE terms for a trial *; *******************************************************; IF LAST.trial THEN DO; * Set the values in an array for subsequent processing; IF &oute GE 1 THEN PUT "For Each Trial"; *******************************************; *** Display trial results for checking **; *******************************************; 10

11 IF &oute GE 1 THEN DO; IF trial LE 5 THEN PUT trial= (yibx1-yibx&ns) (+2 5.2) ' Mean of clusters'; IF trial LE 5 THEN PUT trial= (wib1-wib&ns) (+2 5.2) ' Inverse Var of Cluster mean'/; IF trial LE 5 THEN PUT trial= (slabel1-slabel&ns) (+2 5.0) ' Label of selected clusters'/; IF trial LE 5 THEN PUT trial= (mus1-mus&ns) (+2 5.2) ' Parameter mean of selected cluster'//; *******************************************************; *** FOR EACH TRIAL **; *** Construct: **; *** simple mean of cluster means **; *** weighted cluster mean **; *******************************************************; ybb=mean(of yibx{*}); * Simple mean of cluster means; muwhat=mean(of yibw{*})/mean(of wib{*}); * Weighted mean of cluster means; IF &oute GE 1 THEN PUT ybb= ' Mean of cluster means'/ muwhat= ' Weighted cluster mean'; *********************************************************; *** Construct estimates of shrinkage constants *; *********************************************************; msb=0; mse=0; DO ixx=1 to &ns; DO jx=1 to &ms; mse=mse+(sp{ixx,jx}-yibx{ixx})**2; IF &oute GE 2 THEN PUT "mse detail" trial= mse= sp{ixx,jx}= ixx= jx=; msb=msb+(yibx{ixx}-ybb)**2; mse=mse/(&ns*(&ms-1)); *Mean squared error of unit deviations within clusters; msb=msb/(&ns-1); *Mean square of cluster means about overall mean; IF &oute GE 2 THEN PUT trial= mse= msb= "TRIAL AND MSB"; *****************************************************; *** For each selected cluster in each trial *; *** compute predictors *; *****************************************************; DO ix=1 to &ns; ***** *** Predictors *; ***** * Define unit sampling fraction ; f=&ms/&bm; * Define true shrinkage constants ; vstar=v_mus-v_e/&bm; * V star for use in RP model; ki=&ms*v_mus/(&ms*v_mus+vi); * Mixed Model true shrinkage constant; k=&ms*vstar/(&ms*vstar+v_e); * RP model true shrinkage constant; kstar=&ms*vstar/(&ms*vstar+v_e+vbb); * RP model k* shrinkage constant denom resp error; 11

12 error; krstar=(&ms*vstar+v_e)/(&ms*vstar+v_e+vbb); * RP model k* shrinkage constant num unit var den resp *** Estimate variance components ; vbb_e=&vbb; * Externally assumed known response error variance; v_ee=max(mse-vbb_e,0); * Estimated average var of units for a cluster; vi_e=mse; * Estimated variance of selected cluster and resp error; v_muse=max( (msb-mse+&ms*v_ee/&bm)/&ms,0); * Estimated variance between clusters; vstar_e=max( (msb-mse)/&ms,0); * Estimated adjusted variance of selected cluster for RP model; * Define estimated shrinkage constant ; ki_e=&ms*v_muse/(&ms*v_muse+vi_e); * Mixed Model estimated shrinkage constant; k_e=&ms*vstar_e/(&ms*vstar_e+v_ee); * RP model estimated shrinkage constant; kstar_e=&ms*vstar_e/(&ms*vstar_e+v_ee+vbb_e); * RP model k* estimated shrinkage constant denom resp error; krstar_e=(&ms*vstar_e+v_ee)/(&ms*vstar_e+v_ee+vbb_e); * RP model k* shrinkage constant num unit var den resp error; ********************************************; *** Optional checking output *; ********************************************; IF &oute GE 2 AND TRIAL le 5 THEN PUT mse= msb= ki_e= k_e= kstar_e= kstar_e= "Estimated Shrinkage Constants"; **************************************************************; *** Define array values for a and b to form predictors and MSE; *** See Table 2 of c03ed11.doc for the definitions ; *** The order of the predictors is as follows ; *** p=1 simple mean ; *** p=2 mixed model predictor ; *** p=3 scott and smith predictor ; *** p=4 rp model predictor ; *** p=5 rp model predictor with response error ; *** p=6 mixed model predictor with estimated shrinkage ; *** p=7 scott and smith predictor with estim. shrinkage ; *** p=8 rp model predictor with estim. shrinkage ; *** p=9 rp model predictor with response error with estim; *** shrinkage constant ; * *** Compute various values to evaluate simple mean as predictor *; * i=ix; s=slabel{ix}; mu_s=mus{ix}; ARRAY a{9}; * Coefficient that multiplies overall mean; ARRAY b{9}; * Coefficient that multiplies cluster mean; ARRAY p{9}; * Predictor of realized Cluster Mean; 12

13 ARRAY tmse{9}; * Theoretical Value of MSE ; ARRAY smse{9}; * Squared deviation of predictor from true mean; a{1}=0 ; b{1}=1; *Sample mean; a{2}=1-ki ; b{2}=ki; *Mixed model; a{3}=(1-f)*(1-ki); b{3}=f+(1-f)*ki; *Scott+Smith; a{4}=(1-f)*(1-k); b{4}=f+(1-f)*k; *RP Model ; a{5}=f*(1-krstar)+(1-f)*(1-kstar); b{5}=f*krstar+(1-f)*kstar; *RP with Resp Error Model; a{6}=1-ki_e ; b{6}=ki_e; *Mixed model; a{7}=(1-f)*(1-ki_e); b{7}=f+(1-f)*ki_e; *Scott+Smith; a{8}=(1-f)*(1-k_e); b{8}=f+(1-f)*k_e; *RP Model ; a{9}=f*(1-krstar_e)+(1-f)*(1-kstar_e); b{9}=f*krstar_e+(1-f)*kstar_e; *RP with Resp Error Model; yib=yibx{ix}; *Simple cluster mean; DO e=1 to 9; p{e}=a{e}*ybb+b{e}*yib; * Predictor of cluster mean; smse{e}=(p{e}-mu_s)**2; * Squared deviation of predictor; sqd{e}=sqd{e}+smse{e}; * Sum of Squared deviation of predictor; LABEL p1="cluster*mean:*p1" p2="mixed*model*blup:*p2" p3="scott*+smith*blup:*p3" p4="rp*model*pred:*p4" p5="rp*resperr*model*pred:*p5" p6="est*mixed*model*blup:*p6" p7="est*scott*+smith*blup:*p7" p8="est*rp*model*pred:*p8" p9="est*rp*resperr*model*pred:*p9"; *OUTPUT e1; *****************************************************; *** Compute average MSE from simulation and *; *** theoretical MSE *; *****************************************************; IF lastrec=1 THEN DO; DO e=1 to 9; sqd{e}=sqd{e}/(&trialn*&ns); *Calculate average MSE from simulation; IF e le 5 THEN tmse{e}=((a{e}+b{e})**2) * ((v_e+vbb)/(&ns*&ms) + vstar/&ns - v_mus/&bn) +2*(a{e}+b{e})*v_mus/&bn + -2*((a{e}+&ns*b{e})/&ns) * (v_e/&bm + vstar) + (b{e}**2) * ((&ns-1)/&ns) * ((v_e+vbb)/&ms + vstar) + (vstar-v_mus/&bn) + v_e/&bm ; OUTPUT e2; 13

14 *************************************************; *** Construct summary estimated MSE *; *************************************************; PROC FORMAT; VALUE typf 0="Unkn k est" 1="Estimates" 2="Theoretical" 3="Difference"; DATA e3 (KEEP=typ ntot stat1-stat5); SET e2; ARRAY stat{5}; ARRAY tmse{5}; ARRAY sqd{9}; ARRAY dmse{5}; ntot=&trialn*&ns; DO e=1 to 5; IF e=1 THEN stat{e}=sqd{e}; IF e GT 1 THEN stat{e}=sqd{e+4}; *Use estimated shrinkage constants; typ=0; OUTPUT; DO e=1 to 5; stat{e}=sqd{e}; typ=1; OUTPUT; DO e=1 to 5; stat{e}=tmse{e}; typ=2; OUTPUT; DO e=1 to 5; stat{e}=sqd{e}-tmse{e}; typ=3; OUTPUT; LABEL stat1="mse*cluster*mean:*stat1" stat2="mse*mm BLUP:*STAT2" stat3="mse*ss BLUP:*STAT3" stat4="mse*rp BLUP:*STAT4" stat5="mse*rp+re BLUP:*STAT5" ntot="total # of*simulated*clusters:*ntot" typ="statistic*parameter*difference:*typ"; ***************************************************; 14

15 *** Print main results ***; ***************************************************; PROC PRINT DATA=p0 (OBS=1) SPLIT="*" NOOBS; VAR bn bm mu_p v_mus v_e ; FORMAT mu_p v_mus v_e 7.4; TITLE2 "Table Summary Population Parameters"; PROC PRINT DATA=e3 SPLIT="*" NOOBS; WHERE &vbb=0 ; VAR ntot typ stat1-stat4; FORMAT typ typf. stat1-stat4 f9.7; TITLE2 "Table Summary of MSE results: N=&bn (n=&ns) M=&bm (m=&ms) Pop-Mean=&mu_p Resp.Error-Var=&vbb "; PROC PRINT DATA=e3 SPLIT="*" NOOBS; WHERE &vbb NE 0 ; VAR ntot typ stat1-stat3 stat5; FORMAT typ typf. stat1-stat5 f9.7; TITLE2 "Table Summary of MSE results: N=&bn (n=&ns) M=&bm (m=&ms) Pop-Mean=&mu_p Resp.Error-Var=&vbb "; ***************************************************; *** Optional output for predictors of cluster means; ***************************************************; PROC PRINT DATA=e1 (OBS=25) SPLIT="*"; WHERE oute=1; VAR trial i s mu_s p1-p5; FORMAT mu_s p1-p5 f7.4; TITLE2 "Table Predictors of Cluster Means"; %M %simc(1000, /*# Clusters in population: N */ 60, /*# Units in a cluster: M */ 50, /*Population mean mu */ 8, /*Var Between Cluster means */ 400, /*Var Between Units in clusters*/ 500, /*# of trials */ 30, /*# Clusters in sample : n */ 4, /*# Number of Units per cluster in sample: m*/ 120, /*# Number of units in sample=n*m */ 0, /*Response error on cluster-unit*/ 1, /*# samples per selected cluster. Set=1 unless you want to check the permutations*/ 15

16 0, /*Value to control printed output for population: 0=none, 1=limited, 2=a lot*/ 0, /*Value to control printed cluster selection output: 0=none, 1=limited, 2=a log*/ 0, /*Value to control printed unit selection output: 0=none, 1=limited, 2=a lot*/ 0 /*Value to control printed output for development of predictors: 0=none 1=limited, 2=a lot*/ ); 16

Fall 2015 Solutions. Biostats691F: Practical Data Management and Statistical Computing

Fall 2015 Solutions. Biostats691F: Practical Data Management and Statistical Computing Fall 2015 Solutions Biostats691F: Practical Data Management and Statistical Computing Assignment 8: Creating a Preliminary Data Report - The Fetal Lung Maturity Study Data for the study were available