Section 5. Power Calculations for Proportions 1. Test one proportion compared to a null value; 2. Test Proportions from two groups; 3. Power Calculations for Surveys; 1. Exact test of one proportion compared to a null value; * Tables 5.2.__ in Cohen, pages 152-163; * NTotal = n column; * alpha = a2 column; * g = p2 - null_p -- list values on PROPORTION =; * g = .05 indicates .55 : .45 split; PROC POWER; ONESAMPLEFREQ SIDES = 2 ALPHA = .119 NULLPROPORTION = .5 PROPORTION =.55 .6 .65 .7 Ntotal = 50 POWER = . ; RUN; * *************************; 2. Test Proportions from two groups; * sample size needed to determine specified difference between two proportions collected from 2 independent samples ; PROC POWER; TWOSAMPLEFREQ TEST = lrchi /*fisher lrchi pchi*/ SIDES = 2 ALPHA = .05 GROUPPROPORTIONS = (.45 .25) NPERGROUP = . POWER = .80 ; RUN; * can also enter NTotal = . for combined group total; * The following DATA step applies the formula from Fleiss, Statistical Methods for Rates and Proportions", 3rd ed., p. 72 ; DATA prop_z; alpha=.01; power=.95; DO p1 = .4 to .6 by .1; q1=1-p1; p2=.70; q2=1-p2; pb=(p1+p2)/2; qb=1-pb; z_alpha=PROBIT((1-(alpha/2)) ); z_beta=PROBIT(power); nn = ((z_alpha*sqrt(2*pb*qb)) + (z_beta* (sqrt((p1*q1) + (p2*q2)))))**2 / ( (p2-p1)**2 ); * incorporate a recommended continuity correction ; npg =ROUND((nn/4) * (1+ SQRT(1+ (4/(nn*ABS(p2-p1)))))**2+.5,1); output; END; run; proc print data=prop_z noOBS n; run; PROC POWER; TWOSAMPLEFREQ TEST = LRchi /*lrchi pchi fisher */ SIDES = 2 ALPHA = .05 GROUPPROPORTIONS = (.2 .25) NPERGROUP = . POWER = .80 ; RUN; * another way to produce same result is to specify the reference level for the proportion and enter what difference interests you; PROC POWER; TWOSAMPLEFREQ TEST = Lrchi /* Pchi LRchi fisher */ SIDES = 2 ALPHA = .05 REFPROPORTION = .20 PROPORTIONdiff =.05 NPERGROUP = . /*574*/ POWER = .80 ; RUN; * ***************************** ; * 3. Power Calculations for Surveys; /* Test One Proportion to a specified value You plan to select a SRS sample from a finite population (Npo=4310) and want to check the sample (via study archives) whether a certain data quality criterion is met (yes/no). It is known that 60% (p=.60) is the population "yes" rate. If you want an approximate 95% Confidence Interval for the quality variable with a given width, say +/- 5, 2, or 1%, how many samples do you need? */ DATA smp; * starting values; nn=200; p=.6; q=1-p; Npo=4310; d=.05; OUTPUT; nn=200; p=.6; q=1-p; Npo=4310; d=.02; OUTPUT; nn=200; p=.6; q=1-p; Npo=4310; d=.01; OUTPUT; proc print noOBS; run; PROC MODEL DATA =smp; 2*SQRT( (p*q/(nn-1)) * ((Npo-nn)/Npo)) - d =0; SOLVE nn / out=roots_a(drop= _TYPE_ _MODE_); run; quit; DATA roots_a; SET roots_a; sample_rate = nn / Npo; proc print data=roots_a NOobs; VAR p q d nn Npo sample_rate; format nn 12.1; run; **************************************; * Compare Two Proportions; * sample size needed to detect difference between two proportions in a sample survey (assumed known population size); *Enter the following; * desired value of alpha and power (e.g. alpha=.05 and power=.80) ; * Npo = population size; * p1 = Proportion in Group 1; * p2 = Proportion in Group 2; DATA p_z; *DROP alpha power; alpha=.05; power=.8; z_a=PROBIT((1-(alpha/2)) ); z_b=PROBIT(power); Npo=1000; p1=.55; p2=.70; pb=(p1+p2)/2; OUTPUT; Npo=1000; p1=.50; p2=.60; pb=(p1+p2)/2; output; Npo=1000; p1=.20; p2=.10; pb=(p1+p2)/2; output; PROC PRINT DATA=p_z; RUN; %dld(roots1 roots2 roots3) * PROC MODEL can also compute with the same formula from Fleiss shown above (NOTE: it does not consider population size, Npo); * nn == the desired sample size needed to detect a meaningful difference between p1 and p2 (you decide); PROC MODEL DATA=p_z; (((z_a*SQRT(2*pb*(1-pb)))+(z_b*(SQRT((p1*(1-p1))+(p2*(1-p2))))))**2 / ((p2-p1)**2)) - nn = 0; SOLVE nn / out=roots1(drop=_: ); run; PROC PRINT DATA=roots1 NOobs; RUN; * the following statements will solve for the same values of nn, without explicitly setting nn = f(z_a, z_b, p1, p2); * This step computes sample size, nn, assuming an "infinite" population size; PROC MODEL DATA=p_z; * iter=200 converge=.0001; (((ABS(p2-p1)) - (z_a*SQRT(2*(pb*(1-pb)/nn))))/(SQRT( ((p1*(1-p1)) + (p2*(1-p2)))/nn))) - z_b =0; SOLVE nn / out=roots2(drop=_: ); run; quit; proc print data=roots2 NOobs; run; * assume you have Npo, can enter the fpc = ((N-n)/Npo) into the formula; PROC MODEL DATA=p_z; * iter=200 converge=.0001; * computes nn including the fpc; (((ABS(p2-p1)) - (z_a*SQRT(2*(pb*(1-pb)/nn)*((Npo-nn)/Npo))))/ (SQRT( ((p1*(1-p1)) + (p2*(1-p2)))/nn)*((Npo-nn)/Npo))) - z_b =0; SOLVE nn / out=roots3(drop=_: ); run; quit; proc print data=roots3 NOobs; run;