# set up p-t dataset for analysis of oscar data --- jh 2008.01.18 library(Epi) all.nominated=read.table("nomin8ed.csv",header=T,sep=",") # deaths and p-t sum(all.nominated$dead) sum(all.nominated$lastyr-all.nominated$nom1yr) # person time as a nominee : create last year as nominee # range=1:700 range=1:length(all.nominated$born) nom = all.nominated[range,] ; nom = nom[(nom$win1yr < 0) | (nom$win1yr > nom$nom1yr),] ; nom$as.winner=0 nom$start=nom$nom1yr nom$lastyr = nom$win1yr*(nom$win1yr>0)+nom$lastyr*(nom$win1yr<0); nom$dead = 0*(nom$win1yr>0)+nom$dead*(nom$win1yr<0); nom[1:5,] sum(nom$dead) # person time as a winner win = all.nominated[range,]; win=win[win$win1yr > 0,]; win$as.winner=1 win$start=win$win1yr ; win[1:5,] sum(win$dead) # join all = rbind( nom, win ) summary(all$lastyr - all$start) # 2 people died same year they were nominated.. all$lastyr = all$lastyr+0.5*((all$start==all$lastyr) & (all$dead==1) ) all[1:5,] summary(all$lastyr - all$start) # Define as Lexis object with timescales calendar time and age L=Lexis(entry=list(per=start),exit=list( per=lastyr, age=lastyr-born),exit.status = dead,data=all) # plot Lexis diagram plot(L) # summary summary(L$lex.dur) # split into 1 yr of age x 10 years of calendar Lexis rectangles grid delta.age=1; delta.year=10; LL <- splitLexis( L, breaks = seq(1920,2005,delta.year), time.scale="per") LL <- splitLexis( LL, breaks = seq(0,105,delta.age), time.scale="age" ) names(LL) # sylvestre/huszti/hanley (shh) .. a winner from when performer won... # redelmeier (.r) .. a winner is a winner from outset..... # ie classify PERSONS, not person-years (even retrospectively!) LL$winner.r = 1*(LL$win1yr > 0) # George Burns(514), Richard Burton, and some other dead guys (and some alive) ID=514 LL[LL$id==ID,c("id","imale","born","lastyr","per","age", "nom1yr","win1yr", "winner.r.version","as.winner","lex.dur")] # export data write.table(LL[,c(1:4,6:9,14:20)], file = "Individual-time-slices.txt", sep = " ", quote =FALSE, row.names=FALSE) # Tabulate the deaths and the person-years, overall and by rectangle# as per package # using aggregate function applied directly to LL f.shh=aggregate(LL[,c("lex.Xst","lex.dur")], # shh by=list(a.cat=floor(LL$age/5), p.cat=floor((LL$per-1900)/10), m.cat=LL$imale, w.cat=LL$as.winner ), sum) ; dim(f.shh); f.shh[1:10,] write.table(f.shh,file = "aggregated-Lexis-rectangles.txt", sep = " ",quote =FALSE, row.names=FALSE) f.r =aggregate(LL[,c("lex.Xst","lex.dur")], # redelmeier by=list(a.cat=floor(LL$age/5), p.cat=floor((LL$per-1900)/10), m.cat=LL$imale, w.cat=LL$winner.r ), sum) ; f.r[1:10,] write.table(f.r,file = "aggregated-Lexis-rectangles-r.txt", sep = " ",quote =FALSE, row.names=FALSE) exp(coef(glm(lex.Xst~a.cat+m.cat+p.cat+w.cat,family=poisson,offset=log(lex.dur),data=ds))) # mantel haenszel index.cat=f.shh[f.ssh$w.cat==1,]; index.cat[1:3,] # shh ref.cat=f.shh[f.ssh$w.cat==0,]; ref.cat[1:3,] mh=merge(index.cat,ref.cat,by=c("a.cat","p.cat","m.cat"), # side by side records suffixes=c(".w",".n"), sort=TRUE) ; mh[1:10,] hratio.mh= sum( mh$lex.Xst.w * mh$lex.dur.n / (mh$lex.dur.w+mh$lex.dur.n) ) / sum( mh$lex.Xst.n * mh$lex.dur.w / (mh$lex.dur.w+mh$lex.dur.n) ); round(hratio.mh,2) hratio.mh= sum( mh$lex.Xst.w * mh$lex.dur.n / (mh$lex.dur.w+mh$lex.dur.n) ) / sum( mh$lex.Xst.n * mh$lex.dur.w / (mh$lex.dur.w+mh$lex.dur.n) ); o.minus.e =sum( mh$lex.Xst.w - (mh$lex.Xst.w + mh$lex.Xst.n ) * mh$lex.dur.w / (mh$lex.dur.w + mh$lex.dur.n ) ) var =sum( (mh$lex.Xst.w + mh$lex.Xst.n ) * mh$lex.dur.w * mh$lex.dur.n / (mh$lex.dur.w + mh$lex.dur.n )^2 ) # binomial variance of esposed cases | all cases # 1 Poisson conditional on their sum (d1 | d ) # null expectation = d p q where p = pt1/pt c(o.minus.e, var) mh.null.chi.sq = o.minus.e^2/var ; chi = mh.null.chi.sq^0.5 c("ChiSq=",round(mh.null.chi.sq,2), "Chi",round(chi,2), "P-value", round(2*(1-pnorm(chi)),3) ) test.based.ci.95 = round(hratio.mh^(1+1.96*c(1,-1)/chi),2) ; test.based.ci.95 # -- index.cat=f.r[f.r$w.cat==1,]; index.cat[1:3,] # redelmeier ref.cat=f.r[f.r$w.cat==0,]; ref.cat[1:3,] # repeat from mh down