# jh 2013 09 05 setwd("/Users/jameshanley/Dropbox/Courses/bios601/Mean-Quantile") # RITA # http://www.transtats.bts.gov/DL_SelectFields.asp?Table_ID=236&DB_Short_Name=On-Time ds=read.csv("1004324411_T_ONTIME.csv") # 2013..... 1/2 million flights ds=ds[!is.na(ds$ARR_DELAY_NEW),] N=length(ds$ARR_DELAY_NEW); N summary(ds$ARR_DELAY_NEW) # a luxury we do not usually have... hist(ds$ARR_DELAY_NEW,breaks=100) # a luxury we do not usually have... length(unique(ds$ARR_DELAY_NEW)) n=30 # more realistic in scientific work.... delays.n = sample(ds$ARR_DELAY_NEW,n) interval=mean(delays.200) + c(-1.96,0,1.96)*sd(delays.200)/sqrt(n) ; interval # bootstrap CI n.samples=1000 bs=rep(NA,n.samples) for (s in 1:n.samples) bs[s] = mean(sample(delays.n,n,replace=TRUE)) hist(bs) n.samples=1000 for ( n in seq(50,1000,50) ) { ybar=rep(NA,n.samples) for (s in 1:n.samples) ybar[s] = mean(sample(ds$ARR_DELAY_NEW,n)) f= table(round(ybar,1))/n.samples x=as.numeric(dimnames(f)[[1]]) plot(x,f,xlim=c(2,20),ylim=c(0,0.05), type="h", main=toString(n), ticks=seq(5,20,5)) }