# jh... late Sept2010 + Nov. 1,2010 + Oct 6,2011 +Oct1,2013 # user will need to fill in the blanks ... # Tumbler data -- see Table 1 # illustration of age-distributions etc # using (almost) MLE's from article shape=1.6; rate=0.183 n.channels=60 n.weeks=78 COLOUR= gray( ((78:1)/140)^(2) ) ; COLOUR history=matrix(rep(NA,n.channels*n.weeks),nrow=n.channels) status=matrix(rep(NA,n.channels*n.weeks),nrow=n.channels) start.week=rep(1,n.channels) end.week=rep(-1,n.channels) uptime=0; n.tumblers=0 for (channel in 1:n.channels){ while (start.week[channel] <= n.weeks) { service=ceiling(rgamma(1,shape=shape, rate=rate)) n.tumblers = n.tumblers+1 uptime = uptime + service STATUS=rep(0,service); STATUS[service]=1 end.week[channel]=start.week[channel]+service-1 if (end.week[channel] > n.weeks ) { end.week[channel] = n.weeks service = ( n.weeks-start.week[channel] ) + 1 STATUS=STATUS[1:service] } #print(c(start.week[channel],end.week[channel],service)) history[channel,start.week[channel]:end.week[channel]] = 1:service status[channel,start.week[channel]:end.week[channel]] = STATUS start.week[channel]=end.week[channel]+1 } } print(c(n.tumblers, uptime, round(uptime/n.tumblers,1) ) ) par(mfrow=c(1,1),mar=c(0.1,0.1,0.1,0.1),oma=c(0.1,0.1,0.1,0.1)) plot(c(-1, n.weeks+1), c(-1,n.channels+2), xlab="week", ylab="channel", xlim=c(-1,n.weeks+1), ylim=c(-1,n.channels+1), col="white") text(n.weeks/2,-2,"Week:",cex=.65, col="blue", family="mono",adj=c(1,0.5)) text(-1,n.channels/2,"Item:",cex=.65, col="purple", family="mono",adj=c(1,0.5)) for (week in 1:n.weeks){ text(week,n.channels+1.5,toString(week),cex=.65, family="mono", col="blue",adj=c(1,0.5)) text(week,-1,toString(week),cex=.65, col="blue",family="mono",adj=c(1,0.5)) for (channel in 1:n.channels){ if(week==1) text(0,channel,toString(n.channels+1-channel),cex=.7, col="purple",family="mono",adj=c(1,0.5)) age = history[channel,week] if(status[channel,week]==0) text(week,channel,toString(age), cex=0.65, col=COLOUR[age], family="mono",adj=c(1,0.5)) if(status[channel,week]==1) text(week,channel,toString(age), cex=0.65, col="red",family="mono",adj=c(1,0.5)) } } age.last.week = history[,n.weeks] ; age.last.week; hist(age.last.week) mean(age.last.week) ################ MLE of parameters of gamma model for tumbler data from = c(0:19,20,25) to=c(1:20,25,30) exposed=c(549,521,470,415,371,331,285,247,210,182,163,139,123,114,95,78,65,56,49,37,31,14) broken=c(23,38,48,40,36,42,35,33,26,17,22,16,8,19,17,13,8,7,12,5,17,10) cbind(from, to, exposed, broken) censored = rep(NA,22) censored[1:21] = (exposed[1:21] - broken[1:21]) - exposed[2:22] censored[22] = exposed[22] - broken[22] ; sum(censored) cbind(from, to, exposed, broken, censored) plot(to, censored, type="h") # check sum(censored) + sum(broken) # (gamma-based) likelihood over 2-d parameter grid # unconditional approach # L = prob(fail in bin i) or # prob(survived to end of bin i, but not sure how long more) ie prob(T > censoring time) # ie L = pdf area above base of bin or to right of the end of the last bin survived log.likU = function(shape,scale) { prob.fail.in.interval = ... prob.survive.past.end.interval = ... logL = sum( broken*log(prob.fail.in.interval) + censored*log(prob.survive.past.end.interval) ) return(logL) } log.likU(2,5) log.likU(1.6,1/0.183) log.lik.vector = Vectorize( log.likU, c("shape", "scale") ) shape = seq(1.1, 1.9, length=50) ; scale= seq(4,8,length=50) log.lik.values = outer(shape, scale, log.lik.vector) contour( shape, scale, log.lik.values, xlab="shape", ylab="scale", labcex=1.2) library(rgl) open3d() SCALE=rep(scale,each=length(shape)) SHAPE=rep(shape,times=length(scale)) MEAN = SHAPE*SCALE plot3d( SHAPE, SCALE, log.lik.values, size=2,col="blue") plot( MEAN, log.lik.values) # message: we can estimate mean quite well, but each of its 2 components # (mean = shape * scale) less well # added Nov 1, 2010 # conditional approach used by authors # (but note that they did not get the exact likelihood ) log.likC = function(shape,scale) { prob.fail.in.interval = ... prob.survive.past.end.prev.interval = ... Condnl.prob.fail.in.interval = prob.fail.in.interval / prob.survive.past.end.prev.interval Condnl.prob.survive.interval = 1 - Condnl.prob.fail.in.interval logL = sum( broken*log(Condnl.prob.fail.in.interval) + (exposed-broken)*log(Condnl.prob.survive.interval) ) return(logL) } log.likU(2,5) log.likC(2,5) log.likU(1.5,7) log.likC(1.5,7) # can check that 2 versions, unconditional and conditional # agree everywhere on (shape,scale) parameter region # ...... # ...... etc # can also check algebraicly why this is so ................. # in the unconditional approach, L is the product # L = f1^(n_1) . S1^(c_1) . f2^(n_2) . S2^(c_2) . etc... # where f_i = unconditional prob. of failing in bin i (f's sum to 1) # Si = prob[fail after end of bin i ] (decreasing series, with S0=1 ) # n_i = no. that fail in bin i and c_i = no. that are censored at end of bin i # L can be rearranged as # (f1/S0)^(n_1) . ( [1-f1]/S0 ) ^ ( [n_1+n_2+...] + [c_1+c_2+...] - n_1 ) # . (f2/S1)^(n_2) . ( [1-f2]/S1 ) ^ ( [n_2+n_3+...] + [c_2+c_3+...] - n_2 ) # . (f3/S2)^(n_2) . ( [1-f3]/S2 ) ^ ( [n_3+n_4+...] + [c_3+c_4+...] - n_3 ) # . etc # which is the conditional form ###### added oct 6, 2011 : reparametrization using ate rather than scale # again, unconditional (more traditonal) approach log.likU.rate = function(shape,rate) { prob.fail.in.interval = ... prob.survive.past.end.interval = ... logL = sum( broken*log(prob.fail.in.interval) + censored*log(prob.survive.past.end.interval) ) return(logL) } log.likU.rate(2,1/7) log.lik.vector.rate = Vectorize( log.likU.rate, c("shape", "rate") ) shape = seq(1.2, 1.9, length=50) ; rate = seq(1/7, 1/4, length=50) log.lik.values = outer(shape, rate, log.lik.vector.rate ) contour( shape, rate, log.lik.values, xlab="shape", ylab="rate", labcex=1.2) ###### independent fit .. via fitdistrplus package library(fitdistrplus) # dataset with 549 indiv. obsns censored = rep(NA,22) censored[1:21] = (exposed[1:21] - broken[1:21]) - exposed[2:22] censored[22] = exposed[22] - broken[22] ; sum(censored) cbind(broken,censored) left=c(); right=c() for (i in 1:length(broken)) { left=c( left, rep(from[i],broken[i] ) ) right=c( right, rep(to[i],broken[i] ) ) left=c( left, rep(to[i],censored[i] ) ) right=c(right, rep(1000,censored[i] ) ) } ds = data.frame(left,right) ; head(ds); tail(ds) mledist(ds,"gamma") log.likU.rate(1.578241,0.1690927) ################## rather than shape and scale, log.shape & log.scale ###### # unconditional approach log.likU.logParas = function(log.shape,log.scale) { prob.fail.in.interval = ... prob.survive.past.end.interval = ... logL = sum( broken*log(prob.fail.in.interval) + censored*log(prob.survive.past.end.interval) ) return(logL) } log.likU.logParas(log(2),log(5)) log.lik.vector.logParas = Vectorize( log.likU.logParas, c("log.shape", "log.scale") ) log.shape = log(seq(1.4, 1.7, length=50)) ; log.scale= log(seq(5,7,length=50)) log.lik.values.logParas = outer(log.shape, log.scale, log.lik.vector.logParas) contour( log.shape, log.scale, log.lik.values.logParas, xlab="log.shape", ylab="log.scale", labcex=1.2)