# Current Status Data -- see article

# 2011.11.11, and 2011.11.17 -- thanks to David -- error corrected

# probs =    new.s.hat[index.of.last.support.point.before.each.extraction] ^right.cens * 
#        (1-new.s.hat[index.of.last.support.point.before.each.extraction])^left.cens

# if want each prob to be one or the other, depending on censoring
# must use '*' rather than the '+' I had...

# probs =    new.s.hat[index.of.last.support.point.before.each.extraction] ^right.cens + 
#         (1-new.s.hat[index.of.last.support.point.before.each.extraction])^left.cens

# or , instead of powers , I could have added 2 products...
# 
# PROBS =    new.s.hat[index.of.last.support.point.before.each.extraction] * right.cens +
#         (1-new.s.hat[index.of.last.support.point.before.each.extraction])* left.cens
  

setwd("/Users/jameshanley/Documents/Courses/bios601/Likelihood")

source("GetCurrentStatusData.R") # for primes

topic="primes"
#topic="avalanche"

if(topic=="primes") max.n=29
if(topic=="avalanche") max.n=1999   

if(topic=="primes")    { ds=GetCurrentStatusData(max.n) ; every=1; max.T=max.n } 
if(topic=="avalanche") { ds = read.table("avalancheDataForR.txt"); every=10; max.T=90}   

head(ds) ; tail(ds)

##### change the 10 ds$left and 10 ds$right values to those in the assignmant..

##### ...... 

support = seq(0.5,max.n+0.5,1) ; L=length(support) ; L  

N = length(ds$left); N
m=matrix(rep(NA,L*N),nrow=N)
for (i in 1:length(support))  m[,i] = 1*(support[i] > ds$left & support[i] < ds$right) ; 
m[1:10,1:10] 

support.point.is.in.this.many.intervals = apply(m,2,"sum") ; 

plot(support,support.point.is.in.this.many.intervals,type="l",
 xlab="support point",  xlim=c(0,max.T),
 ylim=c(0,max(support.point.is.in.this.many.intervals)))

# eliminate redundant support points  (ie identical columns)
# (further elimination possible, but not done)

fromLeft=FALSE

if(fromLeft) { 
	M=m[,1] ; t=support[1];
   for(col in 2:L) if( sum( m[,col]==m[,col-1] ) < N) {t=c(t,support[col]) ; M = cbind(M,m[,col]) }
   n.atoms = length(t) 
}
if(!fromLeft) {   
   M=m[,L] ; t=support[L]
   for(col in (L-1):1) if( sum( m[,col]==m[,col+1] ) < N) {
   	  t=c(t,support[col]) ; M = cbind(M,m[,col]) }
   n.atoms = length(t)
   t = t[n.atoms:1]
   M= M[,n.atoms:1]   
}
n.atoms ; t

SUPPORT.point.is.in.this.many.intervals = apply(M,2,"sum") ; 
SUPPORT.point.is.in.this.many.intervals

par(mfrow=c(2,1),mar=c(2,2,0.1,0.1),oma=c(0.1,0.1,0.1,0.1))

plot(c(0,max.n+1),c(-N/10,1.15*(N+1)),col="white",xlim=c(0,max.T))

for(i in seq(1,N,every)) segments(ds$left[i], i, ds$right[i], i, col="grey50", lwd=0.5)

for(i in 1:n.atoms) {
	points(c(t[i]), c(-0.5), pch=19, cex=0.25 )
	text(t[i], -N/20, toString(SUPPORT.point.is.in.this.many.intervals[i]), 
	  adj=c(1,0),cex=0.6, srt = 90 )
	for(j in seq(1,N,every) ) if(M[j,i]==1) points(c(t[i]), c(j), pch=19, cex=0.1 )
}

#  see where could reduce further

drop=rep(FALSE,n.atoms) 
for (i in 1:(n.atoms-1)  ) 
	if( SUPPORT.point.is.in.this.many.intervals[i+1] > 
	    SUPPORT.point.is.in.this.many.intervals[i]      ) drop[i]=TRUE
drop
for (i in n.atoms:2  ) 
	if( SUPPORT.point.is.in.this.many.intervals[i-1] > 
	    SUPPORT.point.is.in.this.many.intervals[i]      ) drop[i]=TRUE
drop
points(t[!drop], rep(-0.5,sum(!drop)), pch=17,col="red",cex=0.5)
points(c(0), c(1.05*N), pch=17,col="red",cex=0.5)
text(1,1.05*N,"minimal set ?", adj=c(0,0.5),col="red",cex=0.7) 
points(c(0), c(1.12*N), pch=19,cex=0.5)
text(1,1.12*N,"support points", adj=c(0,0.5),cex=0.7) 

# self-consistency


inspection.time = ds$left*(ds$left>0) + ds$right*(ds$left==0)
left = ds$left
right  = ds$right

# start with ...

left.cens  = 1*(inspection.time==right)
right.cens = 1*(inspection.time==left)

s.hat = 1-cumsum(SUPPORT.point.is.in.this.many.intervals)/
             sum(SUPPORT.point.is.in.this.many.intervals)     

# plot(c(0,t),c(1,s.hat),type="s", ylim=c(0,1))

index.of.last.support.point.before.each.extraction = 
   unlist(lapply(inspection.time,  function(T) sum(t < T) ))
	   
n.known.gt.t=unlist(lapply(t, function(T) sum(inspection.time== left & inspection.time > T) ))

n.iterations=10

LL= rep(NA,n.iterations)

S.hat.matrix=matrix(rep(NA,50*n.atoms),nrow=50)   # first 50 iterations...
S.hat.matrix[1,]=s.hat

for(iteration in 1:n.iterations) {
	
  all.s.hats = s.hat[index.of.last.support.point.before.each.extraction]

  n.presumed.gt.t = unlist(
  lapply ( t[1:n.atoms],  function(T) {
	           which.of.n.atoms = (1:n.atoms)[ t == T]
	           S = s.hat[which.of.n.atoms]
	           
	           # extracted.alive.before.T
	           
	           extracted.alive.before.T = (inspection.time==left & inspection.time < T)
	           indices.of.these = (1:N)[extracted.alive.before.T]
	           S.for.each.of.these  = all.s.hats[indices.of.these]
	           prob = S / S.for.each.of.these   
	           n.before.expect.gt.t = sum(prob) 
	            
	           # extracted.dead.after.T
	           
	           #print("AFTER")
	           
	           extracted.dead.after.T = (inspection.time==right & inspection.time > T)
	           indices.of.these = (1:N)[extracted.dead.after.T]
	           S.for.each.of.these  = all.s.hats[indices.of.these]
	           prob = (S - S.for.each.of.these ) / (1 -  S.for.each.of.these)   
	           n.after.that.were.gt.t = sum(prob) 
	           
	           n.fitted = n.before.expect.gt.t+n.after.that.were.gt.t
	           #return(n.fitted)
	           }
      )
   )
	
new.s.hat = (n.known.gt.t + n.presumed.gt.t )/N

probs =    new.s.hat[index.of.last.support.point.before.each.extraction] ^right.cens * 
        (1-new.s.hat[index.of.last.support.point.before.each.extraction])^left.cens
        
logL = sum(log(probs))

LL[iteration] = logL

print( max( abs( new.s.hat - s.hat ) ) )

s.hat = new.s.hat

if (iteration < 50) S.hat.matrix[iteration+1,] = s.hat

}

print(round(s.hat,3))
fitted.probs = new.s.hat[1:(length(new.s.hat)-1) ] - new.s.hat[2:length(new.s.hat) ]

plot(c(0,max.n+1),c(0,1),col="white", xlim=c(0,max.T), ylim=c(0,1.05))  # primes

#  plot(c(0,120),c(0,1),col="white")   # interesting portion of avalanche curve

text(0,1.01,"S-hat",adj=c(0,0), col="red")
lines(t,s.hat,type="s", col="red" )


# plot(1:n.iterations,LL,type="l")

cbind(time,left,right)

####  

show.iterations = FALSE

if(show.iterations) {
	par(mfrow=c(1,1),mar=c(2,2,0.1,0.1),oma=c(2,2,0.1,0.1))
	plot(c(0,max.n+1),c(0,1),col="white", xlim=c(0,max.T), ylim=c(0,1.05))
	text(0,1.01,"S.hat, iteration by iteration",adj=c(0,0),cex=0.8) 
	for (i in 1:50) {
	 lines(c(0,t),c(1,S.hat.matrix[i,]),type="s", col= ( rainbow(50) )[i] )
	 
	    }
	}

###### # 

#library(Icens)

#data= matrix(c(left,right),nrow=N) ; data 
#EM=EMICM(data, EMstep=TRUE, ICMstep=TRUE, keepiter=FALSE, tol=1e-07,maxiter=1000) 
#sum(EM$pf)