# 2012.11.07 # 1970 Batting Averages for 18 Major League Players and Transformed Values Xi, O0 # Yi = batting pi = batting At bats average for average for for i # Player first 45 remainder remainder Xi Hi at bats of #season of season (1) (2) (3) (4) (5) # from Efron and Morris JASA 1975 x=scan(,what=list("numeric","","numeric","numeric","numeric","numeric","numeric")) 1 "Clemente" .400 .346 367 -1.35 -2.10 2 "Robinson" .378 .298 426 -1.66 -2.79 3 "Howard" .356 .276 521 -1.97 -3.11 4 "Johnstone" .333 .222 275 -2.28 -3.96 5 "Berry" .311 .273 418 -2.60 -3.17 6 "Spencer" .311 .270 466 -2.60 -3.20 7 "Kessinger" .289 .263 586 -2.92 -3.32 8 "Alvarado" .267 .210 138 -3.26 -4.15 9 "Santo" .244 .269 510 -3.60 -3.23 10 "Swoboda" .244 .230 200 -3.60 -3.83 11 "Unser" .222 .264 277 -3.95 -3.30 12 "Williams" .222 .256 270 -3.95 -3.43 13 "Scott" .222 .303 435 -3.95 -2.71 14 "Petrocelli" .222 .264 538 -3.95 -3.30 15 "Rodriguez" .222 .226 186 -3.95 -3.89 16 "Campaneris" .200 .285 558 -4.32 -2.98 17 "Munson" .178 .316 408 -4.70 -2.53 18 "Alvis" .156 .200 70 -5.10 -4.32 # d=cbind( as.numeric(x[[1]]),as.numeric(x[[3]]),as.numeric(x[[4]]) );d colnames(d)=c("id","p.45","p.year") ds=data.frame(d) ; ds$hits.45 = round(ds$p.45*45,0) ; hits.45 = ds$hits.45 ; hits.45 ; paste(hits.45,collapse=",") p.45 = ds$p.45 P.45=mean(ds$p.45) ; P.45 var(ds$hits.45) 45*P.45*(1-P.45) n.players=length(ds$hits.45) # method of moments empirical.variance.of.18.phats = var(ds$p.45) empirical.variance.of.18.phats # 0.004853 variance.of.18.phats.if.homogeneous.p = P.45*(1-P.45)/45 variance.of.18.phats.if.homogeneous.p # 0.004332 diff = empirical.variance.of.18.phats - variance.of.18.phats.if.homogeneous.p diff # 0.000521 ### overall.variance = Var[E[each.p.hat]] + E[Var[p.hat]] ### 0.004853 = 0.000521 + 0.004332 (approx) ## Var[E[each.p.hat]] = 0.000521 sigma( true p ) = sqrt(0.000521) = 0.023 # .275 +/- 2 sigma == .275 +/- 0.046 #width of approx 9 percentage points #ie from batting 275-46 [239] to 275+46 = [321] #if the true p 's followed a Gaussian distribution ' ############################# R JAGS ####### normal instead of beta for individual players' longterm averages ####### normal instead of binomial for short-term within-player average NormalNormalModel=readLines(,15) model { for (i in 1:n.players) { p.longrun[i] ~ dnorm(mu,precision) short.term.precision[i] <- 45 / ( p.longrun[i] * (1-p.longrun[i] ) ) p.45[i] ~ dnorm(p.longrun[i], short.term.precision[i] ) } mu ~ dunif(0.10,0.90) precision ~ dgamma(0.001,0.001) sigma <- sqrt(1/precision) } writeLines(NormalNormalModel,'NormalNormalModel.txt') INITS= list( mu = .3, precision = 100, p.longrun = rbeta(18,28,72) ) # for Winbugs ######### #data list(n.players=18, p.45=c(18,17,16,15,14,14,13,12,11,11,10,10,10,10,10,9,8,7)) list( mu = .3, precision = 100, p.longrun = c(.1,.1,.1, .1,.1,.1, .1,.1,.1, .1,.1,.1, .1,.1,.1, .1,.1,.1) ) ####################### library('rjags') jags <- jags.model("NormalNormalModel.txt", data = list(p.45=p.45,n.players=n.players), inits=INITS, n.chains = 1, n.adapt = 4000) update(jags, 10000) results=jags.samples(jags, c('mu','sigma'), 10000) str(results) summary(results$mu[1,,1]) summary(results$sigma[1,,1]) results=jags.samples(jags, c('p.longrun' ), 10000) hist( results$p.longrun[1 ,,1]) hist(results$p.longrun[10,,1]) shrunken.estimates = apply(results$p.longrun,1,median); shrunken.estimates plot(c(0.15,0.4),c(0,1),col="white") for (i in 1:18) { segments( shrunken.estimates[i],0,ds$p.45[i],1, col=i ) } ######### ##### toxoplasmosis data -- see Efron articles x=scan(,what=list("numeric","","numeric")) 1 .293 .304 2 .214 .039 3 .185 .047 4 .152 .115 5 .139 .081 6 .128 .061 7 .113 .061 8 .098 .087 9 .093 .049 10 .079 .041 11 .063 .071 12 .052 .048 13 .035 .056 14 .027 .040 15 .024 .049 16 .024 .039 17 .014 .043 18 .004 .085 19 -.016 .128 20 -.028 .091 21 -.034 .073 22 -.040 .049 23 -.055 .058 24 -.083 .070 25 -.098 .068 26 -.100 .049 27 -.112 .059 28 -.138 .063 29 -.156 .077 30 -.169 .073 31 -.241 .106 32 -.294 .179 33 -.296 .064 34 -.324 .152 35 -.397 .158 36 -.885 .216 #