# set up p-t dataset for analysis of oscar data   --- jh 2008.01.18

library(Epi)

all.nominated=read.table("nomin8ed.csv",header=T,sep=",")

# deaths and p-t

sum(all.nominated$dead)
sum(all.nominated$lastyr-all.nominated$nom1yr)

# person time as a nominee : create last year as nominee

# range=1:700
 range=1:length(all.nominated$born)

nom = all.nominated[range,] ;
nom = nom[(nom$win1yr < 0) | (nom$win1yr > nom$nom1yr),] ; 
nom$as.winner=0
nom$start=nom$nom1yr
nom$lastyr = nom$win1yr*(nom$win1yr>0)+nom$lastyr*(nom$win1yr<0);
nom$dead   = 0*(nom$win1yr>0)+nom$dead*(nom$win1yr<0);

nom[1:5,]  
sum(nom$dead)

# person time as a winner 

win = all.nominated[range,]; 
win=win[win$win1yr > 0,]; 
win$as.winner=1
win$start=win$win1yr ;

win[1:5,] 
sum(win$dead)

# join

all = rbind( nom, win )

summary(all$lastyr - all$start)

# 2 people died same year they were nominated..

all$lastyr = all$lastyr+0.5*((all$start==all$lastyr) & (all$dead==1) )

all[1:5,]

summary(all$lastyr - all$start)

# Define as Lexis object with timescales calendar time and age

L=Lexis(entry=list(per=start),exit=list( per=lastyr, age=lastyr-born),exit.status = dead,data=all)

# plot Lexis diagram

   plot(L)

# summary

  summary(L$lex.dur)

# split into 1 yr of age x 10 years of calendar Lexis rectangles grid

  delta.age=1;
  delta.year=10;

  LL <- splitLexis(  L, breaks = seq(1920,2005,delta.year), time.scale="per")
  LL <- splitLexis( LL, breaks = seq(0,105,delta.age), time.scale="age" )

  names(LL)
  
# sylvestre/huszti/hanley (shh) .. a winner from when performer won...

# redelmeier              (.r)  .. a winner is a winner from outset.....  
# ie classify PERSONS, not person-years (even retrospectively!)
  
  LL$winner.r = 1*(LL$win1yr > 0)

# George Burns(514), Richard Burton, and some other dead guys (and some alive)
  
  ID=514
  LL[LL$id==ID,c("id","imale","born","lastyr","per","age",
                              "nom1yr","win1yr",
                              "winner.r.version","as.winner","lex.dur")]

# export data

write.table(LL[,c(1:4,6:9,14:20)],
  file = "Individual-time-slices.txt", sep = " ",
  quote =FALSE, row.names=FALSE)


# Tabulate the deaths and the person-years, overall and by rectangle# as per package
  
 # using aggregate function applied directly to LL 
 
  f.shh=aggregate(LL[,c("lex.Xst","lex.dur")],       # shh 
        by=list(a.cat=floor(LL$age/5),
                p.cat=floor((LL$per-1900)/10),
                m.cat=LL$imale,
                w.cat=LL$as.winner ),
        sum) ; 
        
       dim(f.shh); f.shh[1:10,]
       
  write.table(f.shh,file = "aggregated-Lexis-rectangles.txt", 
   sep = " ",quote =FALSE, row.names=FALSE)
  
  f.r  =aggregate(LL[,c("lex.Xst","lex.dur")],      # redelmeier
        by=list(a.cat=floor(LL$age/5),
                p.cat=floor((LL$per-1900)/10),
                m.cat=LL$imale,
                w.cat=LL$winner.r ),
        sum) ; 
        
        f.r[1:10,]
   
   write.table(f.r,file = "aggregated-Lexis-rectangles-r.txt", 
   sep = " ",quote =FALSE, row.names=FALSE)

exp(coef(glm(lex.Xst~a.cat+m.cat+p.cat+w.cat,family=poisson,offset=log(lex.dur),data=ds)))
   
# mantel haenszel
  
  index.cat=f.shh[f.ssh$w.cat==1,]; index.cat[1:3,]     # shh
    ref.cat=f.shh[f.ssh$w.cat==0,];   ref.cat[1:3,]
      

  mh=merge(index.cat,ref.cat,by=c("a.cat","p.cat","m.cat"), # side by side records
           suffixes=c(".w",".n"), sort=TRUE) ;   mh[1:10,]
  
  hratio.mh= sum( mh$lex.Xst.w * mh$lex.dur.n / (mh$lex.dur.w+mh$lex.dur.n) ) /
             sum( mh$lex.Xst.n * mh$lex.dur.w / (mh$lex.dur.w+mh$lex.dur.n) );
  
  round(hratio.mh,2)
  
  hratio.mh= sum( mh$lex.Xst.w * mh$lex.dur.n / (mh$lex.dur.w+mh$lex.dur.n) ) /
             sum( mh$lex.Xst.n * mh$lex.dur.w / (mh$lex.dur.w+mh$lex.dur.n) );
              
  o.minus.e =sum( mh$lex.Xst.w - (mh$lex.Xst.w  +  mh$lex.Xst.n ) *
                                  mh$lex.dur.w  / (mh$lex.dur.w + mh$lex.dur.n )   ) 
  
  var       =sum( (mh$lex.Xst.w + mh$lex.Xst.n ) * mh$lex.dur.w * mh$lex.dur.n / 
                                  (mh$lex.dur.w + mh$lex.dur.n )^2   ) 
                                  
                                  # binomial variance of esposed cases | all cases
                                  # 1 Poisson conditional on their sum (d1 | d )
                                  # null expectation = d p q where p = pt1/pt
                   
  c(o.minus.e, var)
  
  mh.null.chi.sq = o.minus.e^2/var ; chi = mh.null.chi.sq^0.5 
   
  c("ChiSq=",round(mh.null.chi.sq,2), "Chi",round(chi,2), "P-value", 
    round(2*(1-pnorm(chi)),3) ) 
  
  test.based.ci.95 = round(hratio.mh^(1+1.96*c(1,-1)/chi),2) ; test.based.ci.95   
     
  # --
  
  index.cat=f.r[f.r$w.cat==1,]; index.cat[1:3,]       # redelmeier
  ref.cat=f.r[f.r$w.cat==0,];   ref.cat[1:3,]         # repeat from mh down