# 2016.08.15 JH # getwd() # setwd("/Users/jameshanley/Documents/work/Henderson") ######### JH DATA par(mfrow=c(1,1),mar = c(4,4,0.1,0.1)) ds=read.csv("StepCounts20102011.csv", as.is=TRUE) ; # not used, just for info ds$y=as.numeric(substr(ds$Day,1,2)) ds$m=as.numeric(substr(ds$Day,4,5)) ds$d=as.numeric(substr(ds$Day,7,8)) ds$day=as.numeric( ceiling(difftime(as.Date(ds$Day), "09-12-29", units="days")) ) head(ds); length(ds$day); summary(ds$day) ds$week = ceiling(ds$day/7) ds$day.of.week = ds$day - 7*(ds$week-1) ds[1:15,]; tail(ds) length(ds$day.of.week) # remove entries past 2011-12-31 ds = ds[ds$day <= 732,] plot(ds$day,ds$J, col="blue",type="l",xlab="Day",ylim=c(0,20000), ylab="No. of Steps") text(400, 18000, "J. Hanley, 2010 & 2011", col="blue", cex=1.25,adj=c(0,1)) points(ds$day,ds$AM, col="red",type="l") text(400, 17000, "AM. Hanley, 2010 ", col="red", cex=1.25,adj=c(0,1)) mean(ds$J) ; sd(ds$J) # won't work: need to exclude NA via na.rm option mean(ds$J,na.rm=TRUE) # first digit -- for Benford plot steps = ds$J[!is.na(ds$J) & ds$J > 0] summary(steps) ## http://statistic-on-air.blogspot.ca/2011/08/benfords-law-or-first-digit-law.html bben <- function(k){ as.numeric(head(strsplit(as.character(k),'')[[1]],n=1)) } # extract the first digit from all numbers computed first.digit <- sapply(steps, bben) # plot frequency of first digits table(first.digit) # mean(ds$AM) ; sd(ds$AM) cor(ds$J,ds$AM) plot(ds$AM, ds$J) segments(0,0,15000,15000) summary(ds$J) ; round(sd(ds$J),0) ; round(100* sd(ds$J) / mean(ds$J),0) ; ave.per.day.by.week = aggregate(ds$J, by=list(ds$week), FUN="mean") names(ave.per.day.by.week)=c("week", "mean.per.day") ave.per.day.by.week var.daily.by.week = aggregate(ds$J, by=list(ds$week), FUN="var") names(var.daily.by.week)=c("week", "var.that.week") var.daily.by.week weekly=merge(ave.per.day.by.week,var.daily.by.week,by.x="week", by.y="week" ) round(mean(weekly),0) ; var(weekly$mean.per.day) median( round( weekly$var.that.week / 7, 0) ) plot(ave.per.day.by.week$week, ave.per.day.by.week$mean.per.day,ylim=c(0,13000), xlab="Week",ylab="Average no. of steps per day") text(50, 13000, "J. Hanley, 2010 & 2011", cex=1.25,adj=c(0.5,1)) var.w = var(ave.per.day.by.week$mean.per.day[!is.na(ave.per.day.by.week$mean.per.day)]) var.w; sd=sqrt(var.w) text(50, 12000, paste("SD:",toString(100*round(sd/100,0)) ), cex=1.5) var.d = var(ds$d)