/* Code Sheet for the Variables in the Low Birth Weight Study Described in Section 1.6.2 Page 25-26, 2nd edition of ALR Variable Description Codes/Values Name 1 Identification Code ID Number ID 2 Low Birth Weight 1 = BWT<=2500g, LOW 0 = BWT>2500g 3 Age of Mother Years AGE 4 Weight of Mother at Pounds LWT Last Menstrual Period 5 Race 1 = White, 2 = Black RACE 3 = Other 6 Smoking Status 0 = No, 1 = Yes SMOKE During Pregnancy 7 History of Premature Labor 0 = None, 1 = One, PTL 2 = Two, etc. 8 History of Hypertension 0 = No, 1 = Yes HT 9 Presence of Uterine 0 = No, 1 = Yes UI Irritability 10 Number of Physician Visits 0 = None, 1 = One FTV During the First Trimester 2 = Two,etc. 11 Birth Weight Grams BWT */ /* --- for SAS --- */ /* given the size of the raw data file, better to keep data separate from the program.. so use INFILE rather than LINES so download the lowbwt.dat file , save it somewhere on the hard disk (remember the path!), then have the INFILE statement point to it... ie give the full path e.g. if you store the .dat file in sub-directory or folder called c:\681folder\ , path would be "c:\681folder\lowbwt.dat" MISSOVER option in infile is important safeguard against SAS taking going into next line of raw data in order to find as many data items as there are variables in the INPUT statement (eg if for some reason you had blank fields, With MISSOVER, can limit the damage to the offending record.) */ OPTIONS LINESIZE=85 PAGESIZE=60 ; /* change #chars/line #lines/page */ RUN; DATA lowbwt; /* make a 2-part name if wish to create perm. file */ /* rather than re-creating the dataset each time */ /* then next time would use library instead of DATA step */ *INFILE "c:\681folder\lowbwt.dat.dat" ; INFILE "Macintosh HD:User:dad:courses:681:alr_2:lowbwt.dat" MISSOVER; INPUT ID low age lwt race smoke ptl ht ui ftv bwt ; /* make our OWN indicator variables for race */ /* and be more memorable than 'race_2 and race_3' ! */ ir_black = 0; /* <>ndicator of <>ace=black */ ir_other = 0 ; /* <>ndicator of <>ace=other */ IF race = 2 THEN ir_black = 1; IF race = 3 THEN ir_other = 1; IF id ne . ; /* skips obsn. if my Mac sees blank line */ RUN; * -------------------------------------------------------; TITLE CHECK the Raw data and coding; /* PROC UNIVARIATE useful too */ PROC MEANS DATA=lowbwt; VAR low age lwt race ir_black ir_other smoke ptl ht ui ftv bwt ; RUN; PROC FREQ DATA=lowbwt; TABLES low race smoke ptl ui ftv race*ir_black race*ir_other ; RUN; * -------------------------------------------------------; TITLE1 Table 2.2 FIT Logistic regression by PROC GENMOD; TITLE2 (Generalized linear model: specify distribution & link) ; PROC GENMOD DATA = lowbwt; MODEL low = age lwt ir_black ir_other ftv / DIST=BINOMIAL LINK=LOGIT COVB CORRB ; RUN; * -------------------------------------------------------; TITLE1 Table 2.2 FIT Logistic regression by PROC LOGISTIC; TITLE2 use DESCENDING to make sure it models prob[low=1]; TITLE3 store predicted values in a new variable ; TITLE4 Ask for variance-covariance (and correlation) matrix ; TITLE5 of estimated coefficients: use COVB and CORRB options ; TITLE6 check against Table 2.2 ; PROC LOGISTIC DATA = lowbwt DESCENDING; MODEL low = age lwt ir_black ir_other ftv / COVB CORRB ; OUTPUT OUT = b PREDICTED= fitted_p; RUN; TITLE1 Table 2.3 FIT Logistic regression by PROC LOGISTIC; TITLE2 .; TITLE3 ...................LWT and race ; TITLE4 . ; PROC LOGISTIC DATA = lowbwt DESCENDING; MODEL low = lwt ir_black ir_other / COVB CORRB ; OUTPUT OUT = b PREDICTED= fitted_p; RUN; * -------------------------------------------------------; /* --- Stata section --- */ /* given the size of the raw data file, better to keep data separate from the program.. so use infile rather than input so download the lowbwt.dat file , save it somewhere on the hard disk (remember the path!), then use the 'Set working Folder' (under Stata's File menu) to point to the folder where it is stored */ * start clear infile id low age lwt race smoke ptl ht ui ftv bwt using lowbwt.dat * make our OWN indicator variables for race * and be more memorable than 'race_2 and race_3' ! * * ir_black <>ndicator of <>ace=black * ir_other <>ndicator of <>ace=other * gen ir_black = race == 2 if !missing(race) gen ir_other = race == 3 if !missing(race) * -------------------------------------------------------; * CHECK the Raw data and coding summarize low age lwt race ir_black ir_other smoke ptl ht ui ftv bwt tab1 low race smoke ptl ui ftv tab2 race ir_black , col tab2 race ir_other , col * Table 2.2 FIT Logistic regression by Generalized linear model * ie specify distribution & link glm low age lwt ir_black ir_other ftv , family(binomial) link(logit) * ask for variance-covariance (andcorrelation) matrix of estimated coefficients * vce for covariance * vce, corr for correlation version vce vce, corr * Logistic regression by logistic command * ask for coefficient on logit scale * store predicted values in a new variable * Ask for variance-covariance (and correlation) matrix * of estimated coefficients * check against Table 2.2 logistic low age lwt ir_black ir_other ftv logit vce vce, corr predict p * Table 2.3 FIT Logistic regression by logistic * lwt and race logistic low lwt ir_black ir_other logit vce vce, corr predict p