# 2012.11.07

# 1970 Batting Averages for 18 Major League Players and Transformed Values Xi, O0

# Yi = batting pi = batting At bats average for average for for i 
# Player first 45 remainder remainder Xi Hi at bats of #season of season (1) (2) (3) (4) (5)
 
 # from Efron and Morris JASA 1975 
 
x=scan(,what=list("numeric","","numeric","numeric","numeric","numeric","numeric"))
1 "Clemente"    .400 .346 367 -1.35 -2.10 
2 "Robinson"    .378 .298 426 -1.66 -2.79
3 "Howard"      .356 .276 521 -1.97 -3.11 
4 "Johnstone"   .333 .222 275 -2.28 -3.96 
5 "Berry"       .311 .273 418 -2.60 -3.17 
6 "Spencer"     .311 .270 466 -2.60 -3.20 
7 "Kessinger"   .289 .263 586 -2.92 -3.32 
8 "Alvarado"    .267 .210 138 -3.26 -4.15 
9 "Santo"       .244 .269 510 -3.60 -3.23 
10 "Swoboda"    .244 .230 200 -3.60 -3.83 
11 "Unser"      .222 .264 277 -3.95 -3.30 
12 "Williams"   .222 .256 270 -3.95 -3.43 
13 "Scott"      .222 .303 435 -3.95 -2.71 
14 "Petrocelli" .222 .264 538 -3.95 -3.30 
15 "Rodriguez"  .222 .226 186 -3.95 -3.89 
16 "Campaneris" .200 .285 558 -4.32 -2.98 
17 "Munson"     .178 .316 408 -4.70 -2.53 
18 "Alvis"      .156 .200  70 -5.10 -4.32 

#
d=cbind( as.numeric(x[[1]]),as.numeric(x[[3]]),as.numeric(x[[4]]) );d
colnames(d)=c("id","p.45","p.year")

ds=data.frame(d) ; 

ds$hits.45 = round(ds$p.45*45,0) ; 

hits.45 = ds$hits.45 ; hits.45 ; paste(hits.45,collapse=",")

p.45 = ds$p.45

P.45=mean(ds$p.45) ; P.45

var(ds$hits.45)

45*P.45*(1-P.45)

n.players=length(ds$hits.45)


# method of moments

empirical.variance.of.18.phats = var(ds$p.45)

empirical.variance.of.18.phats # 0.004853

variance.of.18.phats.if.homogeneous.p = P.45*(1-P.45)/45

variance.of.18.phats.if.homogeneous.p  # 0.004332

diff = empirical.variance.of.18.phats - variance.of.18.phats.if.homogeneous.p  

diff  # 0.000521

### overall.variance = Var[E[each.p.hat]] + E[Var[p.hat]]

###        0.004853 = 0.000521 + 0.004332 (approx)

##  Var[E[each.p.hat]] = 0.000521

sigma( true p  ) = sqrt(0.000521) = 0.023

 # .275 +/- 2 sigma  ==  .275 +/- 0.046

#width of approx 9 percentage points

#ie from batting 275-46 [239] to 275+46 = [321]

#if the true p 's followed a Gaussian distribution '  


############################# R JAGS


####### normal instead of beta for individual players' longterm averages

####### normal instead of binomial for short-term within-player average


NormalNormalModel=readLines(,15)
model
{
  for (i in 1:n.players) { 
  	 
  	p.longrun[i] ~ dnorm(mu,precision)
  	
  	short.term.precision[i] <- 45 / (  p.longrun[i] * (1-p.longrun[i] )  )
    p.45[i] ~ dnorm(p.longrun[i], short.term.precision[i] )  
  } 
   
  mu ~ dunif(0.10,0.90)
  precision ~ dgamma(0.001,0.001)
  
  sigma <- sqrt(1/precision)
}


writeLines(NormalNormalModel,'NormalNormalModel.txt')



INITS= list( mu = .3, precision = 100, p.longrun = rbeta(18,28,72) )

# for Winbugs #########

#data
list(n.players=18,
 p.45=c(18,17,16,15,14,14,13,12,11,11,10,10,10,10,10,9,8,7))

list( mu = .3, precision = 100, 
p.longrun = c(.1,.1,.1,  .1,.1,.1, .1,.1,.1, .1,.1,.1, .1,.1,.1, .1,.1,.1) )

#######################


library('rjags')

jags <- jags.model("NormalNormalModel.txt",
             data = list(p.45=p.45,n.players=n.players),
             inits=INITS,
             n.chains = 1,
             n.adapt = 4000)
 
update(jags, 10000)
 
results=jags.samples(jags, c('mu','sigma'), 10000)
str(results)

summary(results$mu[1,,1])
summary(results$sigma[1,,1])



results=jags.samples(jags, c('p.longrun' ), 10000)

hist(  results$p.longrun[1 ,,1])
hist(results$p.longrun[10,,1])

shrunken.estimates = apply(results$p.longrun,1,median); shrunken.estimates

plot(c(0.15,0.4),c(0,1),col="white")

for (i in 1:18) {
	segments( shrunken.estimates[i],0,ds$p.45[i],1, col=i )
}

#########

##### toxoplasmosis data   -- see Efron articles

x=scan(,what=list("numeric","","numeric"))
1 .293 .304
2 .214 .039 
3 .185 .047
4 .152 .115
5 .139 .081
6 .128 .061
7 .113 .061
8 .098 .087
9 .093 .049
10 .079 .041
11 .063 .071
12 .052 .048
13 .035 .056
14 .027 .040
15 .024 .049
16 .024 .039
17 .014 .043
18 .004 .085
19 -.016 .128 
20 -.028 .091 
21 -.034 .073 
22 -.040 .049 
23 -.055 .058 
24 -.083 .070 
25 -.098 .068 
26 -.100 .049
27 -.112 .059
28 -.138 .063
29 -.156 .077 
30 -.169 .073 
31 -.241 .106 
32 -.294 .179
33 -.296 .064
34 -.324 .152
35 -.397 .158
36 -.885 .216

#