# Current Status Data -- see article # 2011.11.11, and 2011.11.17 -- thanks to David -- error corrected # probs = new.s.hat[index.of.last.support.point.before.each.extraction] ^right.cens * # (1-new.s.hat[index.of.last.support.point.before.each.extraction])^left.cens # if want each prob to be one or the other, depending on censoring # must use '*' rather than the '+' I had... # probs = new.s.hat[index.of.last.support.point.before.each.extraction] ^right.cens + # (1-new.s.hat[index.of.last.support.point.before.each.extraction])^left.cens # or , instead of powers , I could have added 2 products... # # PROBS = new.s.hat[index.of.last.support.point.before.each.extraction] * right.cens + # (1-new.s.hat[index.of.last.support.point.before.each.extraction])* left.cens setwd("/Users/jameshanley/Documents/Courses/bios601/Likelihood") source("GetCurrentStatusData.R") # for primes topic="primes" #topic="avalanche" if(topic=="primes") max.n=29 if(topic=="avalanche") max.n=1999 if(topic=="primes") { ds=GetCurrentStatusData(max.n) ; every=1; max.T=max.n } if(topic=="avalanche") { ds = read.table("avalancheDataForR.txt"); every=10; max.T=90} head(ds) ; tail(ds) ##### change the 10 ds$left and 10 ds$right values to those in the assignmant.. ##### ...... support = seq(0.5,max.n+0.5,1) ; L=length(support) ; L N = length(ds$left); N m=matrix(rep(NA,L*N),nrow=N) for (i in 1:length(support)) m[,i] = 1*(support[i] > ds$left & support[i] < ds$right) ; m[1:10,1:10] support.point.is.in.this.many.intervals = apply(m,2,"sum") ; plot(support,support.point.is.in.this.many.intervals,type="l", xlab="support point", xlim=c(0,max.T), ylim=c(0,max(support.point.is.in.this.many.intervals))) # eliminate redundant support points (ie identical columns) # (further elimination possible, but not done) fromLeft=FALSE if(fromLeft) { M=m[,1] ; t=support[1]; for(col in 2:L) if( sum( m[,col]==m[,col-1] ) < N) {t=c(t,support[col]) ; M = cbind(M,m[,col]) } n.atoms = length(t) } if(!fromLeft) { M=m[,L] ; t=support[L] for(col in (L-1):1) if( sum( m[,col]==m[,col+1] ) < N) { t=c(t,support[col]) ; M = cbind(M,m[,col]) } n.atoms = length(t) t = t[n.atoms:1] M= M[,n.atoms:1] } n.atoms ; t SUPPORT.point.is.in.this.many.intervals = apply(M,2,"sum") ; SUPPORT.point.is.in.this.many.intervals par(mfrow=c(2,1),mar=c(2,2,0.1,0.1),oma=c(0.1,0.1,0.1,0.1)) plot(c(0,max.n+1),c(-N/10,1.15*(N+1)),col="white",xlim=c(0,max.T)) for(i in seq(1,N,every)) segments(ds$left[i], i, ds$right[i], i, col="grey50", lwd=0.5) for(i in 1:n.atoms) { points(c(t[i]), c(-0.5), pch=19, cex=0.25 ) text(t[i], -N/20, toString(SUPPORT.point.is.in.this.many.intervals[i]), adj=c(1,0),cex=0.6, srt = 90 ) for(j in seq(1,N,every) ) if(M[j,i]==1) points(c(t[i]), c(j), pch=19, cex=0.1 ) } # see where could reduce further drop=rep(FALSE,n.atoms) for (i in 1:(n.atoms-1) ) if( SUPPORT.point.is.in.this.many.intervals[i+1] > SUPPORT.point.is.in.this.many.intervals[i] ) drop[i]=TRUE drop for (i in n.atoms:2 ) if( SUPPORT.point.is.in.this.many.intervals[i-1] > SUPPORT.point.is.in.this.many.intervals[i] ) drop[i]=TRUE drop points(t[!drop], rep(-0.5,sum(!drop)), pch=17,col="red",cex=0.5) points(c(0), c(1.05*N), pch=17,col="red",cex=0.5) text(1,1.05*N,"minimal set ?", adj=c(0,0.5),col="red",cex=0.7) points(c(0), c(1.12*N), pch=19,cex=0.5) text(1,1.12*N,"support points", adj=c(0,0.5),cex=0.7) # self-consistency inspection.time = ds$left*(ds$left>0) + ds$right*(ds$left==0) left = ds$left right = ds$right # start with ... left.cens = 1*(inspection.time==right) right.cens = 1*(inspection.time==left) s.hat = 1-cumsum(SUPPORT.point.is.in.this.many.intervals)/ sum(SUPPORT.point.is.in.this.many.intervals) # plot(c(0,t),c(1,s.hat),type="s", ylim=c(0,1)) index.of.last.support.point.before.each.extraction = unlist(lapply(inspection.time, function(T) sum(t < T) )) n.known.gt.t=unlist(lapply(t, function(T) sum(inspection.time== left & inspection.time > T) )) n.iterations=10 LL= rep(NA,n.iterations) S.hat.matrix=matrix(rep(NA,50*n.atoms),nrow=50) # first 50 iterations... S.hat.matrix[1,]=s.hat for(iteration in 1:n.iterations) { all.s.hats = s.hat[index.of.last.support.point.before.each.extraction] n.presumed.gt.t = unlist( lapply ( t[1:n.atoms], function(T) { which.of.n.atoms = (1:n.atoms)[ t == T] S = s.hat[which.of.n.atoms] # extracted.alive.before.T extracted.alive.before.T = (inspection.time==left & inspection.time < T) indices.of.these = (1:N)[extracted.alive.before.T] S.for.each.of.these = all.s.hats[indices.of.these] prob = S / S.for.each.of.these n.before.expect.gt.t = sum(prob) # extracted.dead.after.T #print("AFTER") extracted.dead.after.T = (inspection.time==right & inspection.time > T) indices.of.these = (1:N)[extracted.dead.after.T] S.for.each.of.these = all.s.hats[indices.of.these] prob = (S - S.for.each.of.these ) / (1 - S.for.each.of.these) n.after.that.were.gt.t = sum(prob) n.fitted = n.before.expect.gt.t+n.after.that.were.gt.t #return(n.fitted) } ) ) new.s.hat = (n.known.gt.t + n.presumed.gt.t )/N probs = new.s.hat[index.of.last.support.point.before.each.extraction] ^right.cens * (1-new.s.hat[index.of.last.support.point.before.each.extraction])^left.cens logL = sum(log(probs)) LL[iteration] = logL print( max( abs( new.s.hat - s.hat ) ) ) s.hat = new.s.hat if (iteration < 50) S.hat.matrix[iteration+1,] = s.hat } print(round(s.hat,3)) fitted.probs = new.s.hat[1:(length(new.s.hat)-1) ] - new.s.hat[2:length(new.s.hat) ] plot(c(0,max.n+1),c(0,1),col="white", xlim=c(0,max.T), ylim=c(0,1.05)) # primes # plot(c(0,120),c(0,1),col="white") # interesting portion of avalanche curve text(0,1.01,"S-hat",adj=c(0,0), col="red") lines(t,s.hat,type="s", col="red" ) # plot(1:n.iterations,LL,type="l") cbind(time,left,right) #### show.iterations = FALSE if(show.iterations) { par(mfrow=c(1,1),mar=c(2,2,0.1,0.1),oma=c(2,2,0.1,0.1)) plot(c(0,max.n+1),c(0,1),col="white", xlim=c(0,max.T), ylim=c(0,1.05)) text(0,1.01,"S.hat, iteration by iteration",adj=c(0,0),cex=0.8) for (i in 1:50) { lines(c(0,t),c(1,S.hat.matrix[i,]),type="s", col= ( rainbow(50) )[i] ) } } ###### # #library(Icens) #data= matrix(c(left,right),nrow=N) ; data #EM=EMICM(data, EMstep=TRUE, ICMstep=TRUE, keepiter=FALSE, tol=1e-07,maxiter=1000) #sum(EM$pf)