setwd("/Users/jameshanley/Dropbox/Courses/bios601/CaseControlStudies/BirthMonthSports") ds = read.csv("NHLnameB.csv",header=TRUE,as.is=rep(TRUE,11)) head(ds); tail(ds) ; length(ds$Birthplace) cdn = c("BC", "SASK", "MAN" , "ONT", "QUE", "NB") CDN = c( grep("BC",ds$Birthplace) , grep("ALTA",ds$Birthplace), grep("SASK",ds$Birthplace), grep("MAN",ds$Birthplace), grep("ONT",ds$Birthplace), grep("PQ",ds$Birthplace), grep("NB",ds$Birthplace), grep("NS",ds$Birthplace), grep("PEI",ds$Birthplace), grep("NFLD",ds$Birthplace), grep("NW",ds$Birthplace), grep("Yuk",ds$Birthplace) ) ds=ds[CDN,] head(ds); tail(ds) ; length(ds$Birthplace) ds$month = as.numeric(substr(ds$Birthdate,1,2)) table(ds$month)