# updated sept 25, 2012 # Galton data on speed of homimg pigeons f=scan() # bin frequencies 22 43 164 284 598 645 683 396 132 120 120 # f; n=sum(f) ; n = sum(f) ; n; # check lower = c(0,seq(500,1400,100)) ; lower # bin boundaries upper = c(seq(500,1400,100),3000) ; upper # bin boundaries midpoints = c(475, seq(550,1350,100), 1500) ave.speed = sum(f*midpoints) / n; ave.speed d.f. = n-1 var.speed = sum(f*(midpoints-ave.speed)^2)/d.f. ; var.speed var.speed = var.speed - 100^2/12 ; var.speed ; sd = sqrt(var.speed) ; sd se.m = sd/sqrt(n); round(se.m,1) # see notes.. # sd^2 ~ (1/d.f.) * chi.sq.d.f. * sigma^2 ; # var(sd^2) = (1/d.f.)^2 * 2*d.f. * sigma^4 ; # var(sd) = var( [sd^2]^(1/2) ) * [ (d sqrt x / d x ) ]^2; # (d sqrt / d x ) = (1/2) x ^(-1/2) ; (d sqrt / d x )^2 = (1/4) x^(-1) # var(sd) = (1/d.f.)^2 * 2*d.f. * sigma^4 * (1/4) * (sigma^2)^(-1) # = (1/[2d.f.]) * sigma^2 # SE(sd) = sigma/sqrt(2.d.f.) ; CV(sd) = 100 / sqrt( 2 d.f.) se.sd = sd / sqrt(2*d.f.) ; se.sd # limits on sigma sqrt( var.speed * (1/d.f.) * qchisq(c(0.025,0.975),d.f.) ) ###### ML estimates of mu, sigma # working on log2(sigma) scale.. log.lik = function(mu,log2.sigma) sum(f*log( pnorm(upper,mu,2^log2.sigma) - pnorm(lower,mu,2^log2.sigma) )) log.lik(1000,7) # this wont work in outer function (since lower upper and f are themselves vectors), # so 'force' the issue ... log.lik.vector = Vectorize( log.lik, c("mu", "log2.sigma") ) mu = seq(900,1100,1) ; log2.sigma= seq(6,9,0.1) mu = seq(975,989,0.1) ; log2.sigma= seq(7.60,7.685,0.001) log.lik.values = outer(mu, log2.sigma, log.lik.vector) max.log.lik.values = max(log.lik.values) i.max = apply(log.lik.values==max.log.lik.values,1,sum) j.max = apply(log.lik.values==max.log.lik.values,2,sum) mle = c(mu[i.max],round(2^log2.sigma[j.max],1)) ; mle rel.log.lik.values = log.lik.values - max.log.lik.values contour( mu, log2.sigma, rel.log.lik.values, nlevels=40, xlab="mu", ylab="log2.sigma", labcex=1.1) # heatmap(rel.log.lik.values, Rowv = NA, Colv = NA, scale="column", # main = "heatmap(*, NA, NA) ~= image(t(x))") ######## ML estimates of mu, sigma # working on log2(sigma) scale.. # using optim function to maximize # set up logLik with single 2-d vector argument logLik = function(x) { mu = x[1]; log2.sigma = x[2] return( sum(f*log( pnorm(upper,mu,2^log2.sigma) - pnorm(lower,mu,2^log2.sigma) )) ) } logLik(c(1000,10)) ml.fit = optim(c(1000,10),logLik,hessian=T,method="BFGS",control=list(fnscale=-1,parscale=c(0.1,0.1))); ml.fit ##### min chi.sq chi.sq = function(mu,log2.sigma) { fitted.f = n * (pnorm(upper,mu,2^log2.sigma) - pnorm(lower,mu,2^log2.sigma)) return( sum((f-fitted.f)^2/fitted.f) ) } chi.sq(1000,7) chi.sq.vector = Vectorize( chi.sq, c("mu", "log2.sigma") ) mu = seq(900,1100,1) ; log2.sigma= seq(6,9,0.1) mu = seq(978,995,0.1) ; log2.sigma= seq(7.62,7.72,0.0005) chi.sq.values = outer(mu, log2.sigma, chi.sq.vector) min.chi.sq.values = min(chi.sq.values) i.max = apply(chi.sq.values==min.chi.sq.values,1,sum) j.max = apply(chi.sq.values==min.chi.sq.values,2,sum) min.ch.sq.e = c(mu[i.max],round(2^log2.sigma[j.max],1)) ; min.ch.sq.e contour( mu, log2.sigma, chi.sq.values, nlevels=40, xlab="mu", ylab="log2.sigma", labcex=1.0)