# Spiked drinks

setwd("/Users/jameshanley/Desktop")

#Number   Number in study */
#Sex      M = 1, F = 0    */
#drinker  regular drinker, >= 14 units/wk. Y = 1, N = 0
#Age      years 
#smoker   regular smoker Y = 1, N = 0 */
#pure     was drink "pure"(orange juice/lager)? Y = 1, N = 0
#dr       which drink? Lager = 1, orange juice = 0
#concent  a coded concentration: 0,1,2,3,4, according to the
          order of concentration  in the paper.
          0 = no added alcohol to either drink, 
          but 4 is a different
          concentration for lager than for orange juice.
          Within drink, the numbers reflect an ordinal scale
#adj      actual relative concentration, specific to each drink
          (e.g %-ages for OJ have been re-scaled to 0,1,2,4,6,
          and for lager to 0,4,7,10,14, to  reflect the relative
          magnitudes of the added alcohol. But the scales are
          specific to each drink - you would have to multiply
          the OJ scale by some factor to express it on the same scale
          as the lager. Or just convert all back to % of drink volume
          (as explained in the paper) */

#event    which event? Medsoc ball = 0, Public Health Awayday = 1

#spiked   the complement of the variable "pure" 
          (to get the probability that the drink has been spiked

#filter   a filter for when I looked only at the OJ data */

#ojadj    the index used for the paper, 
          added alcohol expressed as % weight by  volume
           (whatever that means).

ds=read.table("spikedDrinksData.txt") 
names(ds)=c("Number","Male", "drinker" ,"Age", "Smoker","pure",
     "dr", "concent", "adj", "event", "spiked","filter","ojadj")
str(ds)
summary(ds)   
head(ds); tail(ds)