# analysis of oscar data --- jh version 2008.02.24 # read in the dataset ds=read.table("aggregated-Lexis-rectangles.txt", header=TRUE) ######### Rate regression ... aggregated data ################## # lex.Xst = number of deaths in sex-age-period-Oscar "cell" # lex.dur = amount of person time in this cell # m.cat = male category (0 no, 1 yes) # a.cat = age category # p.cat = period (calendar-year) category # w.cat = indicator of performer-time spent as winner # version with age and calendar-year as CATEGORIES summary(glm(lex.Xst ~ m.cat + as.factor(a.cat) + as.factor(p.cat) + w.cat, offset=log(lex.dur),family=poisson,data=ds)) # after inspecting the coefficients, you might want to # want to consider other versions of age and calendar-year ... # So... fill in ... # `Redelmeier' dataset ds.r=read.table("aggregated-Lexis-rectangles-r.txt", header=TRUE) ######### Mantel-Haenszel Rate Ratio ################## # split the dataset into 2 sets of records, # one for the performer time spent as winner index.cat=ds[ds$w.cat==1,]; index.cat[1:3,] # and one for the performer time spent as nominee ref.cat=ds[ds$w.cat==0,]; ref.cat[1:3,] # put these datasets side by side # 2 variables with same name, so use suffix to distinguish them # .w for winner time .n for 'not-winner' (nominee) time # lex.Xst.w will denote deaths in winner-time # lex.dur.w will denote 'as winner' p-t # lex.Xst.n will denote deaths in nominee-time # lex.dur.n will denote 'as nominee' p-t ds.for.mh=merge(index.cat,ref.cat,by=c("a.cat","p.cat","m.cat"), suffixes=c(".w",".n"), sort=TRUE) ; ds.for.mh[1:10,] # calculate #.exposed.cases*unexposedPT/PT # and #.unexposed.cases*exposedPT/PT # ... fill in the formula # and sum these over the sex-age-period strata. # ... fill in the formula # Then take the ratio of the 2 sums # ... fill in the formula # ---- # Variance of log of Mantel-Haenszel MRR estimate (B&D Vol II, eqn 3.17, p109) var.log.IDRmh=function(exposed.cases,exposed.pt,unexposed.cases,unexposed.pt){ cases= exposed.cases + unexposed.cases pt = exposed.pt + unexposed.pt num.mh = sum( exposed.cases*unexposed.pt / pt ) den.mh = sum( unexposed.cases*exposed.pt / pt ) psi.mh = num.mh/den.mh num.var = sum( exposed.pt*unexposed.pt*cases/(pt^2) ) den.var = psi.mh*( sum(exposed.pt*unexposed.pt*cases/ (pt*(unexposed.pt+psi.mh*exposed.pt))) )^2 return( c( psi.mh, num.var/den.var ) ) } # apply... var.log.IDRmh(ds.for.mh$lex.Xst.w, ds.for.mh$lex.dur.w, ds.for.mh$lex.Xst.n, ds.for.mh$lex.dur.n ) # take sqrt of var[log MMR-mh], and compare with # SE[beta_hat] for coefficient of w.cat from Poisson regression model. # They should be quite close.