# autism & mm data .. reconstructed from 2002 Danish cohort study

# see material (NEJM article, Editorial, and 2 JH diagrams) in JH's website

# 524-207G Introduction to Epidemiology (med2) Fall 2002 (med2 ) course, Nov 11 lecture

# if wish to work with SAS, see data and sas code in

#  counts / person times measured from JH's Autism diagram, together with SAS program

# in course 634 under 'stratified data: MH etc'  tab

x =scan()
1 91 1.5 31027   4
0 91 1.5 35973   4
1 91 2.5 61809   9
0 91 2.5  5191   0
1 91 3.5 61975   8
0 91 3.5  5025   2
1 91 4.5 61975  12
0 91 4.5  5025   2
1 91 5.5 61975  17
0 91 5.5  5025   0
1 91 6.5 61975   7
0 91 6.5  5025   1
1 91 7.5 61975   7
0 91 7.5  5025   1
1 91 8.25 30987  2
0 91 8.25  2512  0
1 92 1.5 30565   0
0 92 1.5 36435   0
1 92 2.5 60889   7
0 92 2.5  6111   1
1 92 3.5 61052  18
0 92 3.5  5948   0
1 92 4.5 61052  16
0 92 4.5  5948   1
1 92 5.5 61052  12
0 92 5.5  5948   1
1 92 6.5 61052   7
0 92 6.5  5948   0
1 92 7.25 30526  1
0 92 7.25  2974  0
1 93 1.5 30110   4
0 93 1.5 36890   0
1 93 2.5 59983  13
0 93 2.5  7017   0
1 93 3.5 60143  15
0 93 3.5  6857   2
1 93 4.5 60143  15
0 93 4.5  6857   3
1 93 5.5 60143  11
0 93 5.5  6857   4
1 93 6.25 30072  5
0 93 6.25  3428  0
1 94 1.5 29662   4
0 94 1.5 37338   5
1 94 2.5 59090   3
0 94 2.5  7910   1
1 94 3.5 59248  10
0 94 3.5  7752   1
1 94 4.5 59248  10
0 94 4.5  7752   3
1 94 5.25 29624  6
0 94 5.25  3876  0
1 95 1.5 29220   0
0 95 1.5 37780   3
1 95 2.5 58210   8
0 95 2.5  8790   2
1 95 3.5 58366  10
0 95 3.5  8634   2
1 95 4.25 29183 11
0 95 4.25  4317  0
1 96 1.5 28785   1
0 96 1.5 38215   1
1 96 2.5 57343   3
0 96 2.5  9657   0
1 96 3.25 28748  3
0 96 3.25  4752  1
1 97 1.5 28357   4
0 97 1.5 38643   4
1 97 2.25 28174  2
0 97 2.25  5326  1
1 98 1.25  3320  1
0 98 1.25 30180  3

#

# check the data stream got read correctly

# all

x
 
# for just the beginning and end

head(x) ; tail(x)

# for summary rate ratios and rate differences

# from 360 data-values, make 10 columns and 36 rows (year-age strata)

DS=matrix(x,ncol=10, byrow=TRUE) ; head(DS); tail(DS)

# drop redundant columns 

DS=DS[,c(2:5, 9:10)] ; head(DS); tail(DS)

#  name the columns

colnames(DS) = list("yr.born","age.mid","v.ch.yrs",  "v.cases", "not.v.ch.yrs","not.v.cases")

head(DS) ; tail(DS)

crude.data = apply(DS[,3:6],2,sum) ; crude.data  # (crude) column sums 

crude.rate.v     = crude.data[2]/crude.data[1]; 
crude.rate.not.v = crude.data[4]/crude.data[3]; 
crude.rate.ratio = crude.rate.v / crude.rate.not.v

c(crude.rate.v, crude.rate.not.v, round(crude.rate.ratio,2) )

# match on age

# finer (narrower) age-strata
age.specific = aggregate(DS[,3:6],by=list(age=DS[,2]),sum) ;

# coarser (wider) age-strata
age.specific = aggregate(DS[,3:6],by=list(age=floor(DS[,2])),sum) ; 

age.specific  # (age-specific) column sums

# mh-calculations (summary over age-strata)

mh.num =   age.specific$v.cases      *   age.specific$not.v.ch.yrs /
         ( age.specific$v.ch.yrs     +   age.specific$not.v.ch.yrs ) ; mh.num
         
mh.den =   age.specific$not.v.cases  *   age.specific$v.ch.yrs /
         ( age.specific$v.ch.yrs     +   age.specific$not.v.ch.yrs ) ; mh.den

summary.rate.ratio = sum(mh.num) / sum(mh.den) ; round(summary.rate.ratio,2)

# match on age and year

age.year.specific = aggregate(DS[,3:6],by=list(age=DS[,2], year=DS[,1]), sum) ; 

head(age.year.specific)  # (age- and year- specific) column sums
tail(age.year.specific)  # ... same as original 36 rows

# mh-calculations (summary over age- and year- strata)

mh.num =   age.year.specific$v.cases      *   age.year.specific$not.v.ch.yrs /
         ( age.year.specific$v.ch.yrs     +   age.year.specific$not.v.ch.yrs ) ; mh.num
         
mh.den =   age.year.specific$not.v.cases  *   age.year.specific$v.ch.yrs /
         ( age.year.specific$v.ch.yrs     +   age.year.specific$not.v.ch.yrs ) ; mh.den

summary.rate.ratio = sum(mh.num) / sum(mh.den) ; round(summary.rate.ratio,2)