setwd("/Users/jameshanley/Documents/Courses/634/Infertility") ds=read.table("trichopoulos.csv",sep=",",header=T) ; ds[1:10,] # recode >2 a's to be 2 a's ds[,4:9] = 0*(ds[,4:9]==0) + 1*(ds[,4:9]==1) + 2*(ds[,4:9]>=2) ; ds[1:10,] # "Lexis diagram showing two time-scales plot(jitter(ds$age),ds$previous.p) # reproduce Table I cases=table(ds$sa.case,ds$ia.case) ; cases ctls=table(ds$sa.ctl1,ds$ia.ctl1)+table(ds$sa.ctl2,ds$ia.ctl2) ; ctls rr = round( ( cases/ctls ) / (cases[1,1]/ctls[1,1]) , 1) ; rr # m-h # in ref category case.ref.cat = 1 * (ds$ia.case==0 & ds$sa.case==0); case.ref.cat[1:10] n.ctls.ref.cat = 1*(ds$sa.ctl1==0 & ds$ia.ctl1==0) + 1*(ds$sa.ctl2==0 & ds$ia.ctl2==0); n.ctls.ref.cat[1:10] or.mh=matrix(NA,nrow=3,ncol=3); n.informative.sets=matrix(c(NA,rep(0,8)),nrow=3,ncol=3); for (n.sa in 0:2){ for (n.ia in 0:2){ case.index.cat = 1*(ds$sa.case==n.sa & ds$ia.case==n.ia); n.ctls.index.cat = 1*(ds$sa.ctl1==n.sa & ds$ia.ctl1==n.ia) + 1*(ds$sa.ctl2==n.sa & ds$ia.ctl2==n.ia); # print(case.index.cat); # print(n.ctls.index.cat); n.cases = case.ref.cat + case.index.cat; n.ctls = n.ctls.ref.cat + n.ctls.index.cat; n = n.cases + n.ctls ; mh.num.prod = case.index.cat * n.ctls.ref.cat ; mh.den.prod = case.ref.cat * n.ctls.index.cat ; info = (mh.num.prod) > 0 | (mh.den.prod > 0) ; n.info = sum(info); if(n.sa !=0 | n.ia !=0) n.informative.sets[n.sa+1,n.ia+1] = n.info; or.mh[n.sa+1,n.ia+1] = sum(mh.num.prod[info]/n[info]) / sum(mh.den.prod[info]/n[info]); } } round(or.mh,1) n.informative.sets ###### conditional logistic regression # form a file with 1 record per subject d1=ds[,c(1,2,3,4,7)] ; names(d1)[4]="ia"; names(d1)[5]="sa" ; d1[1:5,] d2=ds[,c(1,2,3,5,8)] ; names(d2)[4]="ia"; names(d2)[5]="sa" ; d2[1:5,] d3=ds[,c(1,2,3,6,9)] ; names(d3)[4]="ia"; names(d3)[5]="sa" ; d3[1:5,] d.temp=rbind(d1,d2,d3) ; d.temp=cbind(c(1:83,1:83,1:83),c(rep(1,83),rep(0,83),rep(0,83)),d.temp) ; names(d.temp)[1]="set.no" ; names(d.temp)[2]="case" ; d.temp[c(1:7,83+1:7,2*83+1:7),] # re-arrange so 3 in same set are 1 after the other ord=c(); for (i in 1:83) ord=c(ord,i,i+83,i+2*83) ; ord[1:12] d=d.temp[ord,] ; d[1:9,] library(survival) fit.s=clogit(case~spontaneous+strata(setNumber),data=d); summary(fit.s) fit.i=clogit(case ~ as.factor(ia) +strata(set.no),data=d); summary(fit.i); # unmatched row 1 of table (0 spontaneous) fit.i.0sa.unmatched=glm(case ~ as.factor(ia), family=binomial,subset=(sa==0), data=d); exp(fit.i.0sa.unmatched$coefficients); fit.i0vs1.0sa.unmatched=glm(case ~ as.factor(ia), family=binomial, subset=(sa==0 & (ia==0 | ia==1)), data=d); exp(fit.i0vs1.0sa.unmatched$coefficients); # matched fit.i.0sa.matched=clogit(case ~ as.factor(ia)+strata(set.no),subset=(sa==0), data=d); exp(fit.i.0sa.matched$coefficients); fit.i0vs1.0sa.matched=clogit(case ~ as.factor(ia)+strata(set.no), subset=(sa==0 & (ia==0 | ia==1)), data=d); exp(fit.i0vs1.0sa.matched$coefficients); fit.i0vs1.0sa.matched.cox=coxph(Surv(age,case) ~ as.factor(ia) + strata(set.no), subset=(sa==0 & (ia==0 | ia==1)), data=d) exp(fit.i0vs1.0sa.matched.cox$coefficients); fit.s.0ia=clogit(case ~ as.factor(sa)+strata(set.no),subset=(ia==0), data=d); summary(fit.s.0ia); fit.si=clogit(case ~ as.factor(sa) +as.factor(ia)+strata(set.no),data=d); summary(fit.si) ds$time=18; summary( coxph(Surv(time,case) ~ spontaneous +induced+strata(setNumber),data=ds) ) summary( glm(case~as.factor(spontaneous),subset=(induced==0),family=binomial,data=ds) ) summary( clogit(case~as.factor(spontaneous)+strata(setNumber),subset=(induced==0),data=ds) ) summary( glm(case~as.factor(induced),subset=(spontaneous==0),family=binomial,data=ds) ) summary( clogit(case~as.factor(induced)+strata(setNumber),subset=(spontaneous==0),data=ds) ) summary( clogit(case~as.factor(spontaneous)+as.factor(induced)+strata(setNumber),data=ds) )