# jh 2013.10.08
setwd("/Users/jameshanley/Dropbox/Courses/Statistical Models by D Clayton & M Hills/Notes/ch05/appendicitis")

ds=read.table("app.txt",header=TRUE)
head(ds); tail(ds)

ds$years.at.risk=pmin(ds$agein80,ds$onset)
length(ds$years.at.risk)
mean(ds$years.at.risk)
mean(ds$agein80) ; sum(ds$app)
table(ds$agein80)

py=rep(0,88) ; n.events=rep(0,88) ; age=1:88
for (a in 1:88) py[a] = sum(ds$years.at.risk >= a)
t = aggregate(ds$app,by=list(age=ds$years.at.risk),sum)
n.events[t$age] = t$x
rate = n.events/py


par(mar=c(5,5,5,5))
plot(age,rate,ylab="Cases/PersonYear")
points( 1:88, filter(n.events,rep(1/5,5)) / 
             filter(py,rep(1/5,5)), type="l",pch=19 )
library(splines)
fit=glm(n.events~ns(age,df=5),family=poisson,offset=log(py))
fitted.rates = fit$fitted.values/py
lines(age,fitted.rates,col="blue")
cum.hazard =cumsum(fitted.rates)
risk = 1 - exp(- cum.hazard)

par(new=TRUE)
plot(age,risk, type="l",col="red",xaxt="n",
  yaxt="n",ylab="", ylim=c(0,1))
axis(4)
mtext("Risk",col="red",side=4)

library(survival)

km=survfit(Surv(ds$years.at.risk, ds$app) ~ 1)
lines(km$time,1-km$surv,lwd=2)


par(new=TRUE)
plot(age,py, type="l",col="green",xaxt="n",
  yaxt="n",ylab="")
axis(4)
mtext("Persons At Risk",col="green",side=4)


library(splines)

fit=glm(rate~ns(age,df=3),poisson)

lines(age,fit$fitted.values)