# 2019 -- non-parametric ... plots etc... # Avalanche Data -- see article setwd("/Users/jameshanley/Documents/Courses/bios601/Likelihood") ds=read.table("avalanche-data.txt",header=T,comment.char="#"); head(ds); tail(ds) summary(ds$time) # 2013.10.07 .. ML estimation via optimize (1-para) and optim (multi-para) # dataset with 549 indiv. obsns max.live = 2000 # after seeing plot... # nobody survived past 1000 min, time = pmin(ds$time,max.live) # single exponential exp.logLik=function(mean) { sum( ( ds$status)*log(pexp(time,rate=1/mean)) + (1-ds$status)*log(pexp(time,rate=1/mean, lower.tail=FALSE)) ) } exp.logLik(20) fit.exp=optimize(exp.logLik,c(1,100),maximum=TRUE) plot(1:120,pexp(1:120,rate=1/fit.exp$maximum, lower.tail=FALSE),type="l", ylim=c(0,1), ylab="Estimated Proportion Alive", xlab="Minutes since being buried") # mix of 2 lognormals or gammas mixture.logLik=function(par,model="gamma",fitted=FALSE) { TIME=1:240 logit.F1=par[1]; F1 = exp(logit.F1)/(1+exp(logit.F1)) if(model=="logNormals") { mean.log=par[2:3]; log.sd.of.log = par[4:5] sd.of.log = exp(log.sd.of.log) CDF1=plnorm(time,mean.log[1],sd.of.log[1]) CDF2=plnorm(time,mean.log[2],sd.of.log[2]) CDF1.TIME=plnorm(TIME,mean.log[1],sd.of.log[1]) CDF2.TIME=plnorm(TIME,mean.log[2],sd.of.log[2]) } if(model=="gamma") { mean.1.stage=exp(par[2:3]); shape = exp(par[4:5]) CDF1=pgamma(time,mean.1.stage[1],shape[1]) CDF2=plnorm(time,mean.1.stage[2],shape[2]) CDF1.TIME = pgamma(TIME,mean.1.stage[1],shape[1]) CDF2.TIME = pgamma(TIME,mean.1.stage[2],shape[2]) } CDF=F1*CDF1+(1-F1)*CDF2 CDF.TIME=F1*CDF1.TIME+(1-F1)*CDF2.TIME if(!fitted) return( sum( ds$status*log( F1*CDF1 + (1-F1)*CDF2 ) + (1-ds$status)*log( F1*(1-CDF1) + (1-F1)*(1-CDF2) ) ) ) if(fitted) return(cbind(1-CDF1.TIME, 1-CDF2.TIME, 1-CDF.TIME)) } par=c(0, 0,1, 5,5) mixture.logLik(c(0, 0,1, 3,3), model="logNormals" ) fit=optim(c(0, 0,1, 5,5), mixture.logLik, control=list(fnscale=-1), model="logNormals", fitted=FALSE) par=c(0, 0,0, 1,1) fit=optim(par, mixture.logLik, control=list(fnscale=-1), model="gamma", fitted=FALSE) fit fitted=mixture.logLik(fit$par,model="gamma",fitted=TRUE) lines(1:240,fitted[,1] , lty="dotted" ) lines(1:240,fitted[,2], lty="dotted" ) lines(1:240,fitted[,3],lty="dotted", col="red") # jh sept 23. 2010 + Oct 11, 2011 # older attempts plot(ds$time,ds$status) plot(ds$time,ds$status,xlim=c(0,10000)) max(ds$time[ds$status==0]) library(fitdistrplus) # install it if you dont already have it # dataset with 549 indiv. obsns max.live = 2000 # after seeing plot... # nobody survived past 1000 min, # fill in the ??? # for fitdistrplus left = (ds$status == ???)*ds$time + (ds$status==???)*0 right = (ds$status == ???)*max.live + (ds$status==???)*pmin(ds$time,max.live) DS = data.frame(left,right) ; head(DS) ; tail(DS) fit.exp= mledist(DS,"exp",start=list(rate=1/500)) fit.exp$convergence fit.exp$estimate fit.exp$loglik 1/fit.exp$estimate # check out shape of hazard fn. for gamma distrn. y=1:100 # effective range if set mean to say 20 or so # fool around with shape to get incr. or decr. hazard pdf = dgamma(y, shape = 1, rate = 1/3) comp.cdf = pgamma(y, shape = 1, rate = 1/3, lower.tail=FALSE) hazard = pdf / comp.cdf plot( y, hazard ) ###### mledist(DS,"gamma",start=list(shape=1,rate=1/1000)) # try strating values that do not give to many NAs fit.gamma= mledist(DS,"gamma",start=list(shape=1,rate=1/10)) fit.gamma$convergence fit.gamma$estimate fit.gamma$loglik fit.gamma$estimate fit.gamma$estimate[1]/fit.gamma$estimate[2] # compute fitted hazard fn. plug in mles hgamma = function(x,shape,rate) dgamma(x,shape,rate)/pgamma(x,shape,rate,lower.tail=FALSE) t=1:100 ; plot(t,dgamma(t,10,1/5)) plot(t,hgamma(t,10,1.5)) ####### by hand , 2010 attempts to be generic .... ####### can borrow the 'from scratch' likelihood, and modify it for ####### gamma, so as to verify the mle's produced by mledist in 2011 ! ################ MLE of parameters of 1-para model for avalanche data # 1 -parameter models, with family as n argument and parameter called theta # (exponential-based) likelihood (theta=rate so mean = 1/rate ie rate = 1/mean) exponential.log.lik = function(MEAN) { prob.died.before.extricated = pexp(ds$time,1/MEAN) prob.died.before.extricated[prob.died.before.extricated > 0.999999] = 0.999999 # this avoids log of a virtually zero survival probability prob.alive.when.extricated = 1-prob.died.before.extricated logL = sum( ds$status*log(prob.died.before.extricated) + (1-ds$status)*log(prob.alive.when.extricated) ) return(logL) } range = seq(1,500,length=50) log.lik = unlist(lapply(range,exponential.log.lik)); log.lik # I have to ask around for a better way to avoid lists and lapply plot(log(range),log.lik) # zomm in as needed by changing range Max.Log.lik=max(log.lik) MLE=range[log.lik >= Max.Log.lik] ; MLE range = seq(45,63,length=500) #fitted Survival curve S.curve=1-pexp(1:240,1/MLE) plot(1:240,S.curve) plot(1:240,1-pgamma(1:240,1,)) ############### older material......... ############### we will come back to this when we study Newton-Raphson method of finding mle # MLE via Newton-Raphson algorithm NOV 4, 2010 # see http://en.wikipedia.org/wiki/Newton's_method # in 1-D if want to find x.root such that f[x.root] = 0 # x.root = x.previous - f[x.previous]/f'[x.previous] # in our case f[theta] = d logL / d theta # so we have , in 1 dimension # theta.new = theta.previous - d logL/d theta [theta.previous] / d^2 logL /d theta^2 [theta.previous] # or, in k dimensions, such that # d logL/ d theta is a vector of length k, (often called the score vector, U) # d^2 logL / d theta^2 is a k x k matrix, (its matrix inverse is often called the Information matrix, I) # theta.root = theta.previous - d logL/d theta [theta.previous] x Inv {d^2 logL /d theta^2 } [theta.previous] # EXAMPLE 1-d theta = mean of exponential distribution # logL contribution from i-th data point , a person extricated at time t # = status*log(prob.died.before.t) + (1-status)*log(prob.alive.at.t) # where prob.alive.at.t = exp(-t/theta) and prob.died.before.t = 1 - exp(-t/theta) # d logL / d theta = status * ( d (prob.died.before.t) / d theta ) / ( prob.died.before.t ) # + (1-status) * ( d (1-prob.died.before.t) / d theta ) / ( 1-prob.died.before.t ) # = status * ( (-1) * (-1) * exp(-t/theta) * (t/theta^2) ) / ( 1 - exp(-t/theta) ) # + (1-status) * ( (-1) * exp(-t/theta) * (t/theta^2)/ ) / ( exp(-t/theta) ) # with some help from Mathematica firstD = function(theta) sum( (-1)*ds$status * exp(-ds$time/theta) * (ds$time/theta^2) / (1-exp(-ds$time/theta)) + (1-ds$status) * (ds$time/theta^2) ) secondD = function(theta) sum((-1)*ds$status * exp(-2*ds$time/theta) * (ds$time^2/theta^4) / (1-exp(-ds$time/theta))^2 +(2)*ds$status * exp( -ds$time/theta) * (ds$time /theta^3) / (1-exp(-ds$time/theta)) +(-1)*ds$status * exp( -ds$time/theta) * (ds$time^2/theta^4) / (1-exp(-ds$time/theta)) +(-2)*(1-ds$status) * (ds$time/theta^3) ) ds$time[ds$time>6000] = 6000 ; ds$survive.t = 1-ds$status; head(ds) ; tail(ds) firstD(10); secondD(10) firstD(20); secondD(20) firstD(30); secondD(30) firstD(40); secondD(40) firstD(50); secondD(50) firstD(60); secondD(60) plot(20:500,sapply(20:500,firstD)) plot(20:500,sapply(20:500,secondD)) # start with trial value theta.trial = 20; c(theta.trial, firstD(theta.trial), secondD(theta.trial)) # update theta.trial = theta.trial - firstD(theta.trial) / secondD(theta.trial) ; c(theta.trial, exponential.log.lik(theta.trial), firstD(theta.trial), secondD(theta.trial), -1/secondD(theta.trial)) var.theta.hat = -1/secondD(theta.trial) # double check -- can fit a generalized linear model # since Prob[survive] = exp(-t/theta) # log[ Prob[survive] ] = -t/theta , which has the form of a linear predictor, # with a slope [beta] of -1/theta, and no intercept, # so can use bernouilli regression with log link fit.exp = glm(ds$survive.t~-1+ds$time,family=binomial(link="log")) ; summary(fit.exp) -1/fit.exp$coefficients # this is theta.hat summary(fit.exp)$cov.unscaled # double check variances from 2 approaches... # var(1/theta.hat) = (approx) var(thetahat) x square of jacobian # = var(theta.hat) x [(-1/theta.hat^2) ]^2 var.theta.hat * ( 1/theta.trial^2 )^2 # not sure why they do not match EXACTLY .. ###################### MLE of parameters of various 2-para models for avalanche data #### general2para.log.lik = function(family,para1,para2) { if(family=="gamma") {rate=1/para2; cdf = function(t,para1,para2) pgamma(t,para1,rate) } if(family=="beta") cdf = function(t,para1,para2) pbeta(t,para1,para2) if(family=="lognormal") cdf = function(t,para1,para2) plnorm(t,para1,para2) # mean.log sd.log prob.died.before.extricated = cdf(ds$time,para1,para2) prob.died.before.extricated[prob.died.before.extricated > 0.999999] = 0.999999 # this avoids log of a virtually zero survival probability prob.alive.when.extricated = 1-prob.died.before.extricated logL = sum( ds$status*log(prob.died.before.extricated) + (1-ds$status)*log(prob.alive.when.extricated) ) return(logL) } firstD = function(family, para1,epsilon1, para2,epsilon2) { Dpara1 = ( general2para.log.lik(family,para1+epsilon1,para2) - general2para.log.lik(family,para1 ,para2) ) / epsilon1 Dpara2 = ( general2para.log.lik(family,para1,para2+epsilon2) - general2para.log.lik(family,para1,para2) ) / epsilon2 return( c(Dpara1,Dpara2) ) } secondD = function(family, para1,epsilon1, para2,epsilon2) { Dpara1 = ( firstD(family,para1+epsilon1,epsilon1, para2, epsilon2) - firstD(family,para1, epsilon1, para2, epsilon2) ) / epsilon1 ; Dpara2 = ( firstD(family,para1, epsilon1, para2+epsilon2, epsilon2) - firstD(family,para1, epsilon1, para2, epsilon2) ) / epsilon2 ; return( matrix(c(Dpara1,Dpara2), nrow=2) ) } # start with trial values family="lognormal" ; family="gamma" general2para.log.lik(family, 4,5) shape = 2; scale= 5; theta=c(shape,scale) theta firstD(family,shape,0.001,scale, 0.001) secondD(family,shape,0.001,scale, 0.001) general2para.log.lik(family, shape,scale) # update theta = theta - firstD(family,shape,0.001,scale, 0.001) %*% solve( secondD(family,shape,0.001,scale, 0.001) ) ; theta shape=theta[1]; scale=theta[2] ; shape*scale firstD(family,shape,0.001,scale, 0.001) secondD(family,shape,0.001,scale, 0.001) general2para.log.lik(family, shape,scale) ########## non parametric ...... left = (ds$status==0)*ds$time + (ds$status==1)*0 right = (ds$status==0)*2000 + (ds$status==1)*pmin(ds$time,2000) cbind(left,right,ds$time,ds$status) support = seq(0.5,1999.5,1) ; L=length(support) ; L N = length(left); N m=matrix(rep(NA,L*N),nrow=N) for (i in 1:length(support)) m[,i] = 1*(support[i] > left & support[i] < right) m[,1:10] sums = apply(m,2,"sum") ; # plot(support,sums,type="l") # eliminate redundant support points (ie identical columns) fromLeft=FALSE if(fromLeft) { M=m[,1] ; t=support[1]; for(col in 2:L) if( sum( m[,col]==m[,col-1] ) < N) { t = c(t,support[col]) M = cbind(M,m[,col]) } n.atoms = length(t) } if(!fromLeft) { M=m[,L] ; t=support[L] for(col in (L-1):1) if( sum( m[,col]==m[,col+1] ) < N) { t= c(t,support[col]) M = cbind(M,m[,col]) } n.atoms = length(t) t = t[n.atoms:1] M= M[n.atoms:1,] } M[,1:20] n.atoms SUMS = apply(M,2,"sum") ; SUMS plot(-10,-1000,ylim=c(0,430),xlim=c(0,150), xlab="Minute",ylab="Observation") for(i in seq(1,422,10)){ segments(left[i],i,right[i],i,lwd=2, col= c("red","green")[1+1*(left[i]>0)] ) tt = t[t >= left[i] & t <= right[i] ] points(tt,rep(i,length(tt)), cex=0.5,pch=19) } # self-consistency # start with ... s.hat = 1-cumsum(SUMS)/sum(SUMS) #plot(t,s.hat,type="l") index.of.last.support.point.before.each.extraction = unlist(lapply(ds$time, function(T) sum(t < T) )) n.known.gt.t=unlist(lapply(t, function(T) sum(ds$status==0 & ds$time > T) )) for(iteration in 1:100) { all.s.hats = s.hat[index.of.last.support.point.before.each.extraction] n.presumed.gt.t = unlist( lapply ( t[1:n.atoms], function(T) { #print("T"); print(T) which.of.n.atoms = (1:n.atoms)[ t == T] #print("which.of.n.atoms") ; print(which.of.n.atoms) S = s.hat[which.of.n.atoms] #print("S") ; print(S) # extracted.alive.before.T extracted.alive.before.T = (ds$status==0 & ds$time < T) indices.of.these = (1:N)[extracted.alive.before.T] #print("indices.of.these") ; print(indices.of.these) S.for.each.of.these = all.s.hats[indices.of.these] #print("S.for.each.of.these") ; print(S.for.each.of.these) prob = S / S.for.each.of.these #print("probs") ; print(prob) n.before.expect.gt.t = sum(prob) # extracted.dead.after.T #print("AFTER") extracted.dead.after.T = (ds$status==1 & ds$time > T) indices.of.these = (1:N)[extracted.dead.after.T] #print("indices.of.these") ; print(indices.of.these) S.for.each.of.these = all.s.hats[indices.of.these] #print("S.for.each.of.these") ; print(S.for.each.of.these) prob = (S - S.for.each.of.these ) / (1 - S.for.each.of.these) #print("probs") #print(prob) n.after.that.were.gt.t = sum(prob) n.fitted = n.before.expect.gt.t+n.after.that.were.gt.t return(n.fitted) } ) ) #n.presumed.gt.t new.s.hat = (n.known.gt.t + n.presumed.gt.t )/N print( max( abs( new.s.hat - s.hat ) ) ) s.hat = new.s.hat } plot(t,s.hat,type="l",xlim=c(0,165)) abline(v=seq(0,100,10)) prob.mass=c(1,new.s.hat[1:(n.atoms-1)]) -new.s.hat sum(prob.mass) plot(-10,-1000,ylim=c(0,430),xlim=c(0,150), xlab="Minute",ylab="Observation") for(i in seq(1,422,10)){ segments(left[i],i,right[i],i,lwd=2, col= c("red","green")[1+1*(left[i]>0)] ) tt = t[t >= left[i] & t <= right[i] ] size = prob.mass[t >= left[i] & t <= right[i] ] points(tt,rep(i,length(tt)),cex=5*size,pch=19) } abline(v=seq(0,100,10)) ############ plot(t,s.hat,type="l",xlim=c(0,165)) delta.t = (t - c(0, t[1:(n.atoms-1)])); delta.t ; sum(delta.t) integral.s.hat = sum( s.hat*delta.t) ; integral.s.hat delta.s.hat = c(1, s.hat[1:(n.atoms-1)]) - s.hat ; delta.s.hat ; pdf.hat = delta.s.hat / delta.t plot(t,pdf.hat,type="l",xlim=c(0,100)) h.hat = pdf.hat / s.hat plot(t,h.hat,type="l",xlim=c(0,100)) ###### # library(Icens) # package ‘Icens’ is not available (for R version 3.6.1) data= matrix(c(left,right),nrow=N) ; data EM=EMICM(data, EMstep=TRUE, ICMstep=TRUE, keepiter=FALSE, tol=1e-07,maxiter=1000) sum(EM$pf) ############ # https://www.r-project.org/conferences/useR-2010/tutorials/Fay_1.pdf