A SAS Macro of partitioning subsets (logistic regression)
Source:    Publish Time: 2013-03-24 03:47   3611 Views   Size:  16px  14px  12px
Author: Xuanqian Xie This analysis was conducted for Zheng et al 2013. We selected numerous subsets of entire d

Author: Xuanqian Xie


This analysis was conducted for Zheng et al 2013.

We selected numerous subsets of entire data and tested whether the results of subsets were similar as those from all data. 

 

Reference:

Zheng J, Alsaadi T, Blaichman J, Xie X, Omeroglu A, Meterissian S, Mesurolle B. Invasive ductal carcinoma of the breast: correlation between tumor grade determined by ultrasound-guided core biopsy and surgical pathology. AJR Am J Roentgenol. 2013; 200(1):W71-4  Link: http://www.ncbi.nlm.nih.gov/pubmed/23255773

 

SAS code:

/*And cross-validation, partitioning a sample of data*/

data prime;

    input pn @@;

datalines;

997583            997933            998287            998687

997589            997949            998311            998689

997597            997961            998329            998717

997609            997963            998353            998737

997627            997973            998377            998743

997637            997991            998381            998749

997649            998009            998399            998759

997651            998017            998411            998779

997663            998027            998419            998813

997681            998029            998423            998819

997693            998069            998429            998831

997699            998071            998443            998839

997727            998077            998471            998843

997739            998083            998497            998857

997741            998111            998513            998861

997751            998117            998527            998897

997769            998147            998537            998909

997783            998161            998539            998917

997793            998167            998551            998927

997807            998197            998561            998941

997811            998201            998617            998947

997813            998213            998623            998951

997877            998219            998629            998957

997879            998237            998633            998969

997889            998243            998651            998983

997891            998273            998653            998989

997897            998281            998681            999007

;

 

%macro aa;

%let vars=;                                                                                                                            

data _null_;                                                                                                                           

  set prime;                                                                                                                        

  call symputx('mac',pn);                                                                                                               

  call execute('%let vars=&vars$&mac;');                                                                                                

run;

 

/*List all variables in descriptive statistics*/

%put **Macro_vars= &vars**;

 

%let n=%eval(%sysfunc(countc(&vars,"$"))+0);

%put ** n = &n **;

 

%do i= 1 %to &n;

%let var=%scan(&vars, &i, $);

%put &n &var;

 

proc surveyselect data=bio_grade_f1 method=srs n=180  seed=&var out=pp&i;

run;

 

ods listing close;

ods output ParameterEstimates=Pa&i OddsRatios=OR&i;

proc logistic data=pp&i descending  ;

     where Echo ^=1 and Margins ^=1;

     class size_f (param=ref ref= "Medium") grade (param=ref  ref= "1") echo (param=ref  ref="4") Color (param=ref  ref= "0") Shape (param=ref  ref= "1")

          Margins (param=ref ref= "2")  Orient (param=ref  ref= "1")  Acoustic (param=ref ref= "2") Bound (param=ref ref= "1")  ;    

     model correct= grade size_f Echo age color Shape Margins Orient Acoustic Bound

         /scale=none selection=stepwise slstay=0.10 slentry=0.2 include=3 AGGREGATE lackfit;

run;

 

ods listing;

 

data Or_&i;

    set or&i;

    length effect1 $25.;

    e1=trim(left(scan(effect, 1)));

    e2=trim(left(scan(effect, 2)));

    e3=trim(left(scan(effect, 3)));

    e4=trim(left(scan(effect, 4)));

    effect1=trim(left(scan(effect, 1)))||" "||trim(left(scan(effect, 2)))

         ||" "||trim(left(scan(effect, 3)))||" "||trim(left(scan(effect, 4)));

    drop e1 e2 e3 e4;

run;

 

%end;

 

data Pa;

    set %do c=1 %to &n;

      Pa&c %end;

      ;

run;

 

data Or;

    set %do d=1 %to &n;

      or_&d %end;

      ;

run;

 

proc sql;

    create table Pa_sum as

    select Variable, ClassVal0 as class, count(*) as count,  mean(Estimate) as Estimate,

          mean (StdErr) as StdErr, mean (WaldChiSq) as WaldChiSq, mean (ProbChiSq) as ProbChiSq

      from pa

      group by Variable, ClassVal0;

quit;

 

proc sql;

    create table aa_Pa as

      select Variable, max (count) as n, (calculated n / &n) as Pct format=percent9.0

      from pa_sum

      group by variable

      order by  n descending, variable ;

quit;

 

proc sql;

    create table OR_new as

    select Effect1, count(*) as count, (calculated count / &n) as Pct format=percent9.0,

           mean(OddsRatioEst) as OR, std(OddsRatioEst) as std_OR, mean(LowerCL) as L_CL, mean(UpperCL) as H_CL

      from or

      group by Effect1

    order by count descending, Effect1;

quit;

 

proc means data=or;

    class effect1;

    var OddsRatioEst;

    output out=or_new1 (where=(_type_ ^ =0)) mean= std= p50= min=  p5= p25= p75= p95= max=

              /autolabel autoname;

run;

 

proc sql;

    create table aa_OR  as

    select a.Effect1, count, Pct format=percent9.0, a.OR as mean_OR, OddsRatioEst_P50 as median_OR, std_OR,

            L_CL, H_CL, OddsRatioEst_Min as min_OR, OddsRatioEst_P5 as P5_OR, OddsRatioEst_P25 as P24_OR,

                  OddsRatioEst_P75 as P75_OR, OddsRatioEst_P95 as P95_OR, OddsRatioEst_Max as max_OR

      from or_new a, or_new1 b

    where  a.effect1=b.effect1

    order by count descending, Effect1;

quit; 

 

%mend;

 

%aa;

 

 

/*Compare results of partitioning a sample of data with whole data */

data Aa_or_sel1;

    set Aa_or_sel;

    length effect1 $25.;

    e1=trim(left(scan(effect, 1)));

    e2=trim(left(scan(effect, 2)));

    e3=trim(left(scan(effect, 3)));

    e4=trim(left(scan(effect, 4)));

    effect1=trim(left(scan(effect, 1)))||" "||trim(left(scan(effect, 2)))

         ||" "||trim(left(scan(effect, 3)))||" "||trim(left(scan(effect, 4)));

    drop e1 e2 e3 e4;

run;

 

proc sql;

    create table aa_OR_com    as

    select a.Effect1, a.OddsRatioEst as model_or, mean_OR, median_OR, abs((median_OR-model_or)/model_or)     as diff_median format=percent9.0,

            abs((mean_OR - model_or)/model_or)  as diff_mean format=percent9.0

      from Aa_or_sel1 a, aa_or b

    where  a.effect1=b.effect1

    order by Effect1;

quit;