SAS Macro for conditional logistic regression
Source:    Publish Time: 2013-01-06 06:13   3717 Views   Size:  16px  14px  12px
Author: Xuanqian Xie  I wrote this macro primarily for one of my friends, who did many observational studie

Author: Xuanqian Xie 


I wrote this macro primarily for one of my friends, who did many observational studies using the administrative database. He helped me a lot in the methodology of observational studies, and shared his research experience with me. He suggested me developing this Macro for the nested case-control study. Also, he provided advice and tested this macro.  This macro can be used and modified by the research purpose, but without any warranty.  

This macro was developed in SAS 9.1.3 in Win 2000 system and was tested in SAS 9.1.3 in Win XP system. I have no chance to check it with newer version of SAS (9.2; 9.22 and 9.3) and newer system (Win7; and Win8).  

 

****************************************************************************************

*Purpose: For conditional logistic regression in case-control studies.                 *

*Author: Xuanqian (Shawn) Xie                                                          *

*Date of last update: Nov. 7th, 2008                                                   *

*Program:  Part 1  Data checking.                                                      *

*          Part 2  Descriptive statistics.                                             *

*          Part 3  Crude OR, without interaction between exposure and covariate.       *

*          Part 4  Adjusted OR, including all covariates and all exposures.            *

*          Part 5  Produce WORD file for output, baseline and crude/adjusted OR.       *

*SAS version and environment: This program was created by SAS 9.1.3 version in Win2000 *

*                               system. The compatibility has not yet been tested.     *

*                                                                                      *

****************************************************************************************;

 

****************************************************************************************

*Note: This Macro only used in a specific data structure. Before use it, please        *

*         understand the requirement for each macro variable. The length of all        *

*         variables should be less than 16 characters. Otherwise, it will be           *

*         truncated. Please do not leave any important datasets in work library,       *

*         because all datasets in work library will be cleaned  when finishing this    *

*         program. This program only accepts dichotomous response or continuous        *

*         response. So, if the categorical variable more than 2 values, users must     *

*         create dummy variables before running this program.                          *

*                                                                                      *

*Variables: data =The name of dataset for analysis.                                    *

*           setid=Each pair of case and control has a set number. One pair/set may     *

*                   have, 1 case and several controls.                                 *

*           case =Numeric variable, 0(control) or 1(case).                             *

*           expo=Exposure information, numeric variable, 0 (unexposure) or 1(exposure).*

*                   Exposure variables can before more than 1. There must be a blank   *   

*                   between two Exposure variables.                                    *

*           cova = Covariate information, numeric variable, 0 (no) or 1(yes),          *

*                  or continuous variables There must be a blank between two variables.*                                              *

*           others= Other baseline information, like age, sex and other important      *

*                    covariates.                                                       *

*           crude_class= If the crude OR model include all exposure variables          *

*                        (normally mutually exclusive variables), the value of this    *

*                        macro variable is "yes". Otherwise, just neglect it.          *

*           out_file= The path and file name of output.                                *

*                                                                                      *

*An example of parameter inputs:                                                       *

*   %Con_Log (data=Example, setid=caseid, case=evt2,                                   *

*        expo=c_nsaid  c_nap  c_cel  c_rof  c_mel   past_any, crude_class=yes,         *

*        cova=ht_all dm_all lip_all chf_all cad_ldasa cvd_all pvd_all gi_all resp_all, *

*        others=age_index  sex1, out_file=C:\Documents and Settings\xie\Desktop\OUT);  *                                                                                   

****************************************************************************************;

 

 

options nodate nonumber pageno=1 ps=200 ls=100 formdlim='-';

options mprint mlogic symbolgen;

 

 

%macro Con_Log (data=, setid=, case=, expo=, cova=, others=, crude_class=, out_file=);

/*Part 1: Data checking*/

 

%if %length (&data)=0 %then %do;

    %put "ERROR: NO DATASET";

%end;

 

%if %length (&setid)=0 %then %do;

    %put "ERROR: NO SET ID";

%end;

 

%local exp vov other i n d n_ca n_co;

%let exp=&expo;

%let cov=&cova;

%let other=&others;

 

/*checking missing data*/

%macro missing (data=);

  proc format;

     value missf     . = 'Missing'

                 other = 'Non-Missing';

     value $missf  ' ' = 'Missing'

                 other = 'Non-Missing';

  run;

 

  proc freq data = &data;

      tables _all_ / missing;

      format _character_ $missf. _numeric_ missf.;

        title "Missing Checking of &data";

  run;

  title;

%mend;

 

 

 

data aa;

    set &data;

    keep &setid &case &exp &cov &other;

run;

 

%missing (data=aa);

 

/*Part 2: Descriptive statistics*/

/*Part 3: Crude OR, without interaction between exposure and covariate*/

proc sql noprint;

   create table ss as

   select *

      from dictionary.columns

      WHERE %UPCASE(LIBNAME)=%upcase("work") and %UPCASE(memname)=%upcase("aa");

quit;

 

data zz1;

   length zz $1000. ;

   zz="&exp "||"&cov "||"&other";

   do m=1 to 100;

      call scan(zz, m, position, length);

      if not position then leave;

      Name=substrn(zz, position, length);

      output;

   end;

run;

 

%let vars=;                                                                                                                             

data _null_;                                                                                                                           

  set zz1;                                                                                                                        

  call symputx('mac',name);                                                                                                               

  call execute('%let vars=&vars$&mac;');                                                                                               

run;

 

/*List all variables in descriptive statistics*/

%put **Macro_vars= &vars**;

 

%let n=%eval(%sysfunc(countc(&vars,"$"))+0);

%do i= 1 %to &n;

%let var=%scan(&vars, &i, $);

%put &var;

 

proc sort data=aa out=bb;

    by &case;

run;

 

proc sql;

   select count (distinct &var)

   into: No_dist

   from bb;

quit;

 

data s&i ;

    variable= "&var";

    No_dist =&No_dist;

run;

 

%end;

 

%macro combine (var_len=, out=, nn=, name=);

data &out;

    length &var_len $16.0;

    set %do d=1 %to &nn;

      &name&d %end;

      ;

run;

%mend combine;

 

%combine (var_len=variable, out=ssss,  nn=&n,  name=s);

 

Data cate Cont;

    set ssss;

    if No_dist >4 then output cont;

      else output cate;

run;

 

/*Part 2.1 : Descriptive statistics of Category variables*/

%let vars_cate=;                                                                                                                            

data _null_;                                                                                                                            

  set cate;                                                                                                                       

  call symputx('mac',variable);                                                                                                               

  call execute('%let vars_cate=&vars_cate$&mac;');                                                                                                

run;

 

/*List Category variables (value 0 or 1)in descriptive statistics*/

%put **Macro vars_cate= &vars_cate**;

 

%let n_ca=%eval(%sysfunc(countc(&vars_cate,"$"))+0);

%do i= 1 %to &n_ca;

%let var=%scan(&vars_cate, &i, $);

%put &var;

 

proc sort data=aa out=bb;

    by &case;

run;

 

proc freq data=bb;

    by &case;

    tables &var /out=z&i;

run;

 

data yy&i;

    length N_per $24. ;

    set z&i;

      if &var=0 then delete;

      if &case=0 then group=1;

      if &case=1 then group=2;

      variable="&var";

    N_per=trim(left(put(COUNT, 8.0)))||"("||trim(left(put(PERCENT, 8.1)))||")";

      drop &case &var;

run;

 

data y&i;

    length variable $16. per_control $24. per_case $24.;

    retain variable;

      array aa [*] $ per_control per_case;

      do k= 1 to dim(aa);

      set yy&i;

       aa[k]=N_per;

      end;

      drop percent group k COUNT N_per;

run;

 

data cc;

    set bb;

    time=2-&case;

run;

 

/*3.1 Crude OR, without interaction*/

ods output ParameterEstimates=l&i;

 

proc phreg nosummary data=cc;

    title "crude OR: &var";

    model time*&case(0) = &var / ties=discrete rl ;

    strata &setid;

run;

 

ods show;

title;

%end;

 

%combine (var_len=variable, out=base1,  nn=&n_ca,  name=y);

 

%combine (var_len=variable, out=crude1,  nn=&n_ca,  name=l);

 

/*2.3 crude class of exposure*/

 

%if %upcase(&crude_class)=YES  %then %do;

ods output ParameterEstimates=crude_class;

 

proc phreg nosummary data=cc;

    title "crude_class OR";

    model time*&case(0) =  &exp / ties=discrete rl ;

    strata caseid;

run;

 

ods show;

title;

 

 

proc sql;

    update crude1 a

    set HazardRatio=(select b.HazardRatio from crude_class b where a.variable=b.variable),

        HRLowerCL=(select b.HRLowerCL from crude_class b where a.variable=b.variable),

        HRUpperCL=(select b.HRUpperCL from crude_class b where a.variable=b.variable),

        ProbChiSq=(select b.ProbChiSq from crude_class b where a.variable=b.variable)

    where variable in (select variable from crude_class);

quit;

 

%end;

 

 

/*Part 2.2 : Descriptive statistics of Continuous variables*/

%let vars_cont=;                                                                                                                            

data _null_;                                                                                                                            

  set cont;                                                                                                                       

  call symputx('mac',variable);                                                                                                               

  call execute('%let vars_cont=&vars_cont$&mac;');                                                                                                

run;

 

/*List Category variables (value 0 or 1)in descriptive statistics*/

%put **Macro vars_cont= &vars_cont**;

 

%let n_co=%eval(%sysfunc(countc(&vars_cont,"$"))+0);

%do j= 1 %to &n_co;

%let var_co=%scan(&vars_cont, &j, $);

%put &var_co;

 

/* Descriptive statistics*/

proc univariate data=bb normal;

    by &case;

      var &var_co;

    output out=xx&j P5=p5  P95=p95 Mean=mean STD=std MEDIAN=P50;

run;

 

data x&j;

    length variable $16.0 mean_std $20. &case 8.0 ;

    set xx&j;

      label mean ="the mean"

           std ="the standard deviation"

           p95 ="the 95th percentile"

           p50 ="the median"

           p5  ="the 5th percentile";

    variable="&var_co";

      mean_std =trim(left(mean))||"("||trim(left(std))||")";

    if index(mean, ".")^=0 or index(std, ".")^=0 then do;

    mean_std=trim(left(put(mean, 8.2)))||"("||trim(left(put(std, 8.2)))||")";

    end;

    else do;

    mean_std=trim(left(put(mean, 8.0)))||"("||trim(left(put(std, 8.0)))||")";

    end;

      keep variable  mean_std  &case ;

run;

 

data t&j;

    length mean_control $16.0 mean_case $16.0;

    retain variable;

      array aa [*] $ mean_control mean_case;

      do k= 1 to dim(aa);

      set x&j;

       aa[k]=mean_std;

      end;

      drop k mean_std &case;

run;

 

/*Crude OR, without interaction*/

ods output ParameterEstimates=h&j;

 

proc phreg nosummary data=cc;

    title "crude OR: &var_co";

    model time*&case(0) = &var_co / ties=discrete rl ;

    strata &setid;

run;

 

ods show;

title;

%end;

 

%combine (var_len=variable, out=base2,  nn=&n_co,  name=t);

 

%combine (var_len=variable, out=crude2,  nn=&n_co,  name=h);

 

 

 

 

/*Part 4: Adjusted OR, including all covariates and all exposures */

 

ods output ParameterEstimates=adjusted;

 

proc phreg nosummary data=cc;

    title "Adjusted OR";

    model time*&case(0) =  &exp &cov/ ties=discrete rl ;

    strata caseid;

run;

 

ods show;

title;

 

 

/*Part 5: Produce WORD file for output*/

 

data crude;

    set crude1 crude2;

run;

data crude_OR;

    length OR_crude $16. ;

    set crude;

      label OR_crude="Crude OR (95% CI)" P_crude="P value";

    OR_crude =trim(left(put(HazardRatio, 8.2)))||"("||trim(left(put(HRLowerCL, 8.2)))

        ||"-"||trim(left(put(HRUpperCL, 8.2)))||")";

      P_crude=trim(left(put(ProbChiSq, 8.4)));

      if P_crude="0.0000" then P_crude="<0.0001";

      keep variable OR_crude P_crude;

run;

 

data adj_OR;

    length OR_adj $16. ;

    set adjusted;

      label OR_adj="Adjusted OR (95% CI)" P_adj="p value";

    OR_adj =trim(left(put(HazardRatio, 8.2)))||"("||trim(left(put(HRLowerCL, 8.2)))

        ||"-"||trim(left(put(HRUpperCL, 8.2)))||")";

      P_adj=trim(left(put(ProbChiSq, 8.4)));

      if P_adj="0.0000" then P_adj="<0.0001";

      keep variable OR_adj P_adj;

run;

 

ods results off;

ods listing close;

ods rtf file ="&out_file..rtf"  NOTOC_DATA ;

 

proc print data=base1 noobs label;

title "Baseline characteristic of category variables" ;

      label per_control="Control N(%)" per_case="Case N(%)";

run;

 

title;

 

title "Baseline characteristic of continuous variables" ;

proc print data=base2 noobs label;

      label mean_control="Control Mean(SD)" mean_case= "Case Mean(SD)";

run;

 

title;

 

title "Crude and adjusted OR/HR of exposures and covariates" ;

footnote1 "The end of output" ;

footnote2 "&systime  &sysdate9";

proc sql;

    select a.variable, OR_crude,  OR_adj

    from adj_or a, crude_or  b

    where a.variable=b.variable;

quit;

 

title;

 

footnote1;

footnote2;

 

ods rtf close;

 

ods listing;

ods results on;

 

/*The end of Macro;

proc datasets lib=work kill;

run;

quit;*/

 

%mend Con_Log;