/* 20) Wins is 0, which means this person won no Oscars 21) Noms is 0, which means this person was never nominated for an Oscar 22) FirstFilm_Year is 1954, which means the first film was at age 41 23) FirstWin_Year is blank, because this person never won 24) FirstNon_Year is blank, because this person never was nominated 25) in total, amounts to 1670 persons and 16 core attributes 26) of course, number of derived attributes can be much larger 27) notice that this dataset is a bit bigger than what was available in 2000 28) that's because we've updated it to include another year of awards 29) and also updated it to include another year of deaths 30) sample size had been 1649 and now is 1670 31) total deaths had been 772 and now is 789 */ * updated april 17, 2006; options ls=80 ps=45; run; data all ; drop end_rec; infile 'Macintosh HD:Courses:626:Oscars:RedelmeierToHanley.txt' delimiter="," missover; input Identity Male Born_USA $ White $ Name_Ch $ drama $ Birth Final Alive Films FourStar Wins Noms Film1_Yr Win1_Yr Nom1_Yr end_rec ever; female = 1 - male; notalive = 1 - alive; age_f1 = Film1_Yr - Birth; age_nom1 = Nom1_Yr - Birth; age_n1_c = int(age_nom1/10); age_win1 = Win1_Yr - Birth; won = .; if Nom1_Yr ne . then do; won = 0; If Win1_yr ne . then do ; T_to_win = Win1_Yr - Nom1_Yr ; won = 1; end; If Win1_yr eq . then T_to_win = Final - Nom1_Yr ; end; If Win1_yr ne . then do ; T_f1_win = Win1_Yr - Film1_Yr ; z_f1_win = 1; end; If Win1_yr eq . then do ; T_f1_win = Final - Film1_Yr ; z_f1_win = 0; end; won_same = 0; if won = 1 and T_to_win = 0 then won_same = 1 ; age_last = Final - Birth; sur_nom1 = Final - Nom1_Yr; fu_last = Final - Nom1_Yr; post_win = Final - Win1_Yr; nomin8ed = (Nom1_Yr ne .); nevernom = (Nom1_Yr eq .); if birth ne . ; run; proc means sum n data=all; var nomin8ed nevernom noms nom1_yr won alive notalive ; run; * Winners versus other nominated ; data win_nom; set all; if nomin8ed and ( final > . and final >= nom1_yr ) and ( (Win1_yr ne . and Win1_yr >= Nom1_yr) or (Win1_yr = . ) ); run; proc print data=all; where ( nomin8ed and ( (final = . ) or (final < nom1_yr) or (Win1_yr ne . and Win1_yr < Nom1_yr) ) ); run; proc means sum mean n min max data=win_nom; class won; var nomin8ed birth male nevernom noms nom1_yr won alive notalive age_win1; run; run; * Winners versus never nominated ; data win_ctl; set all; w = 0; if won =1 then w=1; if ( (won=1) and (final > .) and (final >= nom1_yr ) and (Win1_yr >= Nom1_yr) ) or ( (nevernom=1) and (final > .) and (final >= Film1_Yr ) ) ; run; proc means sum n min max data=win_ctl; class w; var nomin8ed nevernom noms nom1_yr won alive notalive; run; run; * Examples of immortal times ; title "win vs. nominated.... age at death of nominated who have died "; run; proc univariate data=win_nom; var age_last; where (won=0 and alive=0 ); run; title "win vs. nominated.... age at which 1st won.. all winners "; run; proc univariate data=win_nom; var age_win1; where (won=1); run; %let cutoff = 65; title "win vs. nominated.... men who won late (after cutoff) "; run; proc print data=win_nom n; where (won=1 and male=1 and age_win1 > &cutoff); run; title "win vs. nominated.... women who won late (after cutoff)"; run; proc print data=win_nom n; where (won=1 and female=1 and age_win1 > &cutoff); run; title "richard burton" ; run; proc print data=win_nom; where (won=0 and age_last = 59 and noms =7 ); run; title "george burns , jessica tandy, james coburn" ; run; proc print data=win_nom; where ( (age_win1 >= 79) or (age_win1 = 70 ) ); run; title "win vs. ctls .... ctls who died early ( <= cutoff ) "; run; proc univariate data=win_ctl; var age_last; where (w = 0 and notalive and age_last <= &cutoff ); run; title "win vs. ctls .... winners who won late ( > cutoff ) "; run; proc univariate data=win_ctl; var age_win1; where (w = 1 and age_win1 > &cutoff); run; * listings ... lived >= 65 etc ; %let ctls = 1; %let cutoff = 65; data restrict; drop who; if &ctls ne 1 then set win_nom; if &ctls eq 1 then set win_ctl; who=0; if &ctls eq 1 and ((won=1 or won = .) and (age_last >= &cutoff) ) then who=1; if &ctls ne 1 and ((won=1 or won = 0) and (age_last >= &cutoff) ) then who=1; if who; run; options ls = 125; title "never, died >=65 but < 70)"; run; proc sort data=restrict; by age_last; proc print data=restrict heading=v; where (won=. and age_last < 70 and notalive); run; proc print data=restrict heading=v; where (won=. and age_win1 >= 70); run; options ls = 125; title "never, died >=65 but < 70)"; run; proc sort data=restrict; by age_last; proc print data=restrict heading=v; where (won=0 and age_last < 70 and notalive=1); run; proc print data=restrict heading=v; where (win=1 and age_win1 >= 70); run; * -------- CRUDE (K-M and Cox ) ---------; title "vs nominees " ; run; proc sort data=win_nom; by won; Proc lifetest data = win_nom /* notable */ ; by won; time age_last * notalive(0); run; Proc lifetest data = win_nom notable plots=(s); time age_last * notalive(0); strata won; run; proc phreg data=win_nom; model age_last * notalive(0) = won / risklimits; run; title "vs ctls " ; run; proc sort data=win_Ctl; by w; Proc lifetest data = win_Ctl /* notable */ ; by w; time age_last * notalive(0); run; proc phreg data=win_ctl; model age_last * notalive(0) = w / risklimits; run; Proc lifetest data = win_ctl notable plots=(s); time age_last * notalive(0); test w; run; * ------- M-H ------------- risksets at deaths --------------------- ; options ls=90 ps=65 nocenter; run; %let ctls = 0; * 0 if win vs. nom, 1 if win vs. ctls ; %let width = 10; * rectangles in Lexis diagram ; %let height = 2; data p_year; ************ person years (used also for M-H) ; keep id male year period age age_cat t_cox t_cox_p1 dead winner a b c d; if &ctls eq 1 then set win_ctl; if &ctls ne 1 then set win_nom; id = _n_; if &ctls eq 1 then age_0 = film1_yr; if &ctls ne 1 then age_0 = nom1_yr; do year = age_0 to final; dead = (year = final and alive = 0); winner = (win1_yr ne .) and (win1_yr <= year ); a = winner * dead ; b = winner * (1-dead); c = (1-winner)* dead ; d = (1-winner)* (1-dead); age = year - birth; t_cox = age; t_cox_p1 = t_cox+0.99; period = &width*floor(year/&width); period = period + (period +(&width-1) )/10000; age_cat = &height*floor(age/&height); age_cat = age_cat + (age_cat +(&height-1) )/1000; output; end; RUN; proc means data=p_year sum min mean max; run; * ------------------ ; title " MH num and denom based on risksets "; run; proc sort data=p_year; by male period age_cat ; run; proc means noprint sum data=p_year; by male period age_cat ; var a b c d; output out = freqs sum = a b c d; data strata; set freqs; r1=(a+b); r2=(c+d); c1=(a+c);c2=(b+d); n = r1+r2; e = r1*c1/n; v = r1*r2*c1*c2/(n*n*(n-1)); num = a*d/n; den = b*c/n; info = (c1 > 0 and c2 > 0 and r1 > 0 and r2 > 0 and num ne den ); could = (c1 > 0 and c2 = 0); run; proc means sum data=strata; var num den a e v; output out = mh sum = num den a e v; run; data hr ; set mh; hr = num/den; chi = abs(a - e)/sqrt(v); hr_l = hr**(1+1.96/chi); hr_U = hr**(1-1.96/chi); p_value = 2*(1-probnorm(chi)); proc print /* round*/ ; run; title " cond-nl logistic regression for each riskset (same as M-H) "; run; * cox model with person years, no other covars ; data patterns; input winner ; lines; 0 1 ; run; Proc phreg data=p_year noprint outest = beta covout; model t_cox*dead(0) = winner / ties=discrete risklimits ; strata male period age_cat; baseline out = fitted covariates = patterns survival = s /nomean; run; proc print data=beta; run; data z; retain beta; keep beta se z hr hr_L hr_U p; set beta; if _TYPE_ = "PARMS" then beta=winner; if _TYPE_ = "COV" then do; hr = exp(beta); se=sqrt(winner); z = beta/se; p = 2*(1-probnorm(abs(z))); hr_L = exp(beta - 1.96*se); hr_U= exp(beta + 1.96*se); output; end; proc print data=z; run; Proc phreg data=p_year noprint outest = beta covout; model t_cox*dead(0) = winner / ties=discrete risklimits ; strata male period age_cat; proc print data=beta; run; data z; retain beta; keep beta se z hr hr_L hr_U p; set beta; if _TYPE_ = "PARMS" then beta=winner; if _TYPE_ = "COV" then do; hr = exp(beta); se=sqrt(winner); z = beta/se; p = 2*(1-probnorm(abs(z))); hr_L = exp(beta - 1.96*se); hr_U= exp(beta + 1.96*se); output; end; proc print data=z; run; * counting process; Proc phreg data=p_year noprint outest = beta covout ; model (t_cox,t_cox_p1)*dead(0) = winner / ties=discrete; strata male period age_cat; proc print data=beta; run; data z; retain beta; keep beta se z hr hr_L hr_U p; set beta; if _TYPE_ = "PARMS" then beta=winner; if _TYPE_ = "COV" then do; hr = exp(beta); se=sqrt(winner); z = beta/se; p = 2*(1-probnorm(abs(z))); hr_L = exp(beta - 1.96*se); hr_U= exp(beta + 1.96*se); output; end; proc print data=z; run; /* in order to convert p=0.013 (2-sided) to SE; data calc; z=probit(0.013/2); output; proc print ; run; */ run; *** time dependent models ; * vs. nominated ; %let other = %str( ); * orig. ; title "time dep. Cox model, original ?? all time included THIS ONE " ; run; Proc phreg data = win_nom ; model age_last * notalive(0) = &other already / risklimits; already = 0; if ( age_win1 > 0 ) and (age_win1 <= age_last ) then already = 1; run; title "time dep. Cox model, original ??, time < nom. excluded " ; run; Proc phreg data = win_nom ; model age_last * notalive(0) = &other already / risklimits; already = .; if (age_nom1 > . ) and (age_nom1 <= age_last) then already = 0; if (age_nom1 > . ) and (age_nom1 <= age_last) and (age_win1 > . ) and (age_win1 <= age_last) then already = 1; run; * new ; title "time dep. Cox model, time < nom. excl, adj for sex and birth yr " ; run; %let other = %str(male birth); Proc phreg data = win_nom ; model age_last * alive(1) = &other already / risklimits; already = .; if (age_nom1 > . ) and (age_nom1 <= age_last) then already = 0; if (age_nom1 > . ) and (age_nom1 <= age_last) and (age_win1 > . ) and (age_win1 <= age_last) then already = 1; run; * cumulative glory; title "time dep. Cox model, time < nom. excl, adj for sex and birth yr " ; run; %let other = %str(male birth); Proc phreg data = win_nom ; model age_last * alive(1) = &other G G_sq / risklimits; gloryYrs = .; if (age_nom1 > . ) and (age_nom1 <= age_last) then do; G = 0; G_sq = 0; end; if (age_nom1 > . ) and (age_nom1 <= age_last) and (age_win1 > . ) and (age_win1 <= age_last) then do: G = age_last - age_win1; g_sq = G*G; run; ********* vs. ctls ; * orig; title "time dep. Cox model, ctls, original ?? all time included " ; run; %let other = %str( ); Proc phreg data = win_ctl ; * THIS ONE CLOSEST TO THE ORIG. REPORT; model age_last * notalive(0) = &other already / risklimits; already = 0; if (age_win1 > . ) and (age_win1 <= age_last) then already = 1; run; title "time dep. Cox model, original ??, time < 1st film. excluded " ; run; Proc phreg data = win_nom ; model age_last * notalive(0) = &other already / risklimits; already = .; if (age_f1 > . ) and (age_f1 <= age_last) then already = 0; if (age_f1 > . ) and (age_f1 <= age_last) and (age_win1 > . ) and (age_win1 <= age_last) then already = 1; run; * new; title "time dep. Cox model, ctls, new , time < first film excl " ; run; %let other = %str(male birth); Proc phreg data = win_ctl ; model age_last * alive(1) = &other already / risklimits; already = .; if (age_f1 > . ) and (age_f1 <= age_last) then already = 0; if (age_f1 > . ) and (age_f1 <= age_last) and (age_win1 > . ) and (age_win1 <= age_last) then already = 1; run; *********** --- actor years and counterfactuals ************ ; Title "counterfactual -- what if winning didnt matter ? "; Title2 " logistic regression: unit of analysis is person-year "; run; * check to see that ctls is set correctly **** ; * win vs. nom; proc logistic descending data=p_year; model dead = male year age winner / risklimits; run; * win vs. ctls ; proc logistic descending data=p_year; model dead = male year age winner / risklimits; run; Title "counterfactual -- what if winning didnt matter ? "; Title2 "fitted curve for each winner, with null and fitted beta_winner "; %let l_w_nom = %str(17.9084 + MALE*0.3495 -YEAR*0.0142 + AGE*0.0938); %let l_w_ctl = %str(11.0418 + MALE*0.1853 -YEAR*0.0109 + AGE*0.0979); data fitted; keep male birth win1_yr age_win1 identity age year post_win s_if_not s_if_win a_if_not a_if_win diff z; retain b_w_nom -0.2018 me_w_nom 0.1228 b_w_ctl -0.1575 me_w_ctl 0.1124 b_w; z = -1.96; z = 0.0; z = 1.96; **** -1.96 0 and 1.96 for limits ; if _n_ = 1 and &ctls=0 then b_w = b_w_nom + z * me_w_nom; if _n_ = 1 and &ctls=1 then b_w = b_w_ctl + z * me_w_ctl; if &ctls eq 1 then set win_ctl; if &ctls ne 1 then set win_nom; if male = 1 and birth = 1921 and win1_yr = 1960 then put identity Male Born_USA Birth Final Alive Films Wins Noms Film1_Yr Win1_Yr Nom1_Yr ; if (Win1_Yr ne . ) then do; age_next = (Win1_Yr - birth)+1; age = (Win1_Yr - birth); year = Win1_yr; post_win=0; s_if_Win=1; s_if_Not=1; output; do age = age_next to 110; year = year + 1; post_win = post_win + 1; if not(&ctls) then do; l_if_not = &l_w_nom ; l_if_win = l_if_not + 1*(b_w); end; if &ctls then do; l_if_not = &l_w_ctl ; l_if_win = l_if_not + 1*(b_w); end; a_if_not = 0.5 * s_if_not ; a_if_win = 0.5 * s_if_win ; s_if_not = s_if_not / ( 1 + exp(l_if_not) ); s_if_Win = s_if_win / ( 1 + exp(l_if_win) ); a_if_not = a_if_not + 0.5 * s_if_not ; a_if_win = a_if_win + 0.5 * s_if_win ; diff = a_if_Win - a_if_not; if year <= 2001 then output; if male = 1 and birth = 1921 and win1_yr = 1960 and year <= 2001 then put age year post_win s_if_not s_if_win a_if_not a_if_win diff ; end; end; run; Title "counterfactual -- what if winning didnt matter ? "; Title2 "(fitted) person years lived by winners "; proc means sum data=fitted; var a_if_Not a_if_win diff; run; proc means sum data=fitted; var a_if_Not a_if_win diff; Where( male = 1 and birth = 1921 and win1_yr = 1960); run; proc sort data=fitted ; by identity male birth age_win1; proc means sum data=fitted noprint; by identity male birth age_win1; var a_if_Not a_if_win diff; output out = indiv sum = a_if_Not a_if_win diff; run; proc univariate data=indiv; run; proc means n sum data=win_nom; var post_win; where (won = 1 ) ; run; proc sort data=fitted; by post_win; proc means mean noprint data=fitted; by post_win; var s_if_Not s_if_win ; output out = tmp sum = s_if_Not s_if_win; data summary; set tmp; s_if_Not = s_if_Not/2.38; s_if_win = s_if_win/2.38; * *** ; data ctl_L; keep post_win s_Not s_win_L; set summary; s_Not = s_if_Not; S_win_L = s_if_win; run; data ctl_P; keep post_win s_win_P; set summary; S_win_P = s_if_win; run; data ctl_U; keep post_win s_win_U; set summary; S_win_U = s_if_win; run; data ctl; merge ctl_L ctl_P ctl_U; proc plot data=ctl; plot s_Not*post_win = "0" s_win_P*post_win="P" s_win_L *post_win = "L" s_win_U*post_win="U"/ overlay; data _null_; retain count 0; file "Macintosh HD:Courses:626:Oscars:forCtls.txt" ; set ctl end=eof; if count = 0 then do; put "ctls={"; count=1; end; if not(eof) then put "{" post_win 4. "," s_Not 5.1 "," s_win_L 5.1 "," s_win_P 5.1 "," s_win_U 5.1 "}," ; if eof then put "{" post_win 4. "," s_Not 5.1 "," s_win_L 5.1 "," s_win_P 5.1 "," s_win_U 5.1 "} };" ; run; * *** ; data win_L; keep post_win s_Not s_win_L; set summary; s_Not = s_if_Not; S_win_L = s_if_win; run; data win_P; keep post_win s_win_P; set summary; S_win_P = s_if_win; run; data win_U; keep post_win s_win_U; set summary; S_win_U = s_if_win; run; data win; merge win_L win_P win_U; proc plot data=win; plot s_Not*post_win = "0" s_win_P*post_win="P" s_win_L *post_win = "L" s_win_U*post_win="U"/ overlay; run; data _null_; retain count 0; file "Macintosh HD:Courses:626:Oscars:forWins.txt" ; set win end=eof; if count = 0 then do; put "wins={"; count=1; end; if not(eof) then put "{" post_win 4. "," s_Not 5.1 "," s_win_L 5.1 "," s_win_P 5.1 "," s_win_U 5.1 "}," ; if eof then put "{" post_win 4. "," s_Not 5.1 "," s_win_L 5.1 "," s_win_P 5.1 "," s_win_U 5.1 "} };" ; run; /* Title "counterfactual -- what if winning didnt matter ? "; Title2 "survival: w: if winning mattered ; n: if it didnt "; proc plot data=curve; plot s_if_Not*post_win = "n" s_if_Win*post_win="w" / overlay; where (post_win ne .); run; Title "observed and fitted post-win curves for winners"; Title2 "survival: f: fitted if winning mattered ; o: observed "; proc lifetest data = nomin8ed noprint outsurv = o_s; time post_win * death(0); where (win1_yr ne .); data obs_s; set o_s; keep post_win survival; if _censor_ eq 0 ; proc sort data = obs_s; by post_win; proc sort data = curve; by post_win; data both; merge curve obs_s; by post_win; proc plot data=both; plot s_if_Win*post_win="f" survival*post_win="o" / overlay; where (post_win ne .); run; * ****** for Lexis **** ; proc freq data=all; tables age_last / out = freqD; where (won = 0 or . and notalive); run; data _null_; retain c 0; file "Macintosh HD:Courses:626:Oscars:freqD.txt" ; set freqD end=eof; if c = 0 then do; put "freqD={"; c=1; end; if not(eof) then put "{" age_last 3. "," count 3. "}," ; if eof then put "{" age_last 3. "," count 3. "} };" ; run; proc freq data=win_nom; tables age_win1 / out = freqW; where (won=1); run; data _null_; retain c 0 ; file "Macintosh HD:Courses:626:Oscars:freqW.txt" ; set freqW end=eof; if c = 0 then do; put "freqW={"; c=1; end; if not(eof) then put "{" age_win1 3. "," count 3. "}," ; if eof then put "{" age_win1 3. "," count 3. "} };" ; run; data a; set win_nom; r_no = ranuni(1234567); if Win1_Yr = . then Win1_yr = -1; if Nom1_Yr = . then Nom1_yr = -1; if (won=0 and age_last = 59 and noms =7 ) or (won=1 and birth = 1918 and age_last > 59 and final = 1996 ) or (age_win1 >= 79) or (won ne 1 and ( 69 <=age_last <= 69) and notalive and (1900 <= birth <= 1908) ) or (won=1 and notalive and age_last < 60 and (1912 <= birth <= 1916) ) or (won=0 and notalive and (88 <= age_last <= 97) and (1904 <= birth <= 1906) ) or (won=0 and notalive and (44 <= age_last <= 47) and (1940 <= birth <= 1944) ) or (won=0 and alive and (final = 2001) and (1934 <= birth <= 1934) and (Nom1_yr=1965));; run; proc means sum; class won; var notalive ; run; data _null_; retain count 0; file "Macintosh HD:Courses:626:Oscars:forLexis.txt" ; set a end=eof; if count = 0 then do; put "examples={"; count=1; end; if not(eof) then put "{" birth 5. "," final 5. "," film1_yr 5. "," nom1_yr 5.1 "," win1_yr 5.1 "}," ; if eof then put "{" birth 5. "," final 5. "," film1_yr 5. "," nom1_yr 5.1 "," win1_yr 5.1 "} };" ; run; */ * nov 30 ; * --------------------------------------------------------- ; title 'prob of winning as fn. of gender and time since nom' ; * probabilities saved in file haz ; * --------------------------------------------------------- ; %let ctls = 1; data target; if not(&ctls) then do; set nomin8ed; t_var = T_to_win ; z_var = win; start_y = Nom1_Yr; output; end; if &ctls then do; set won_ctl; t_var = T_f1_win ; z_var = z_f1_win; start_y = Film1_Yr; if (win=1 or ever = . ) then output ; end; run; proc sort data = target; by male ; options ls=75 ps=55; Proc lifetest data = target method = lt width=1 notable outsurv = s; by male ; time t_var *z_var(0); run; proc sort data=s; by male t_var ; data haz; keep male fh0-fh100 mh0-mh100; array haz(0:1,0:100) fh0-fh100 mh0-mh100; retain fh0-fh100 mh0-mh100; set s end = last; haz(male,t_var) = hazard; if last then output; run; * --------------------------------------------------------- ; title 'fake winners => k-m diff[ave long] pvalue[logrank]' ; * --------------------------------------------------------- ; proc sort data = target; by male; data n8ed ; keep identity male birth first_m last_m Nom1_Yr age_nom1 Final alive win age_last start_y; retain n 0; set target end=eof ; by male; first_m = 0; if first.male then first_m = 1; last_m = 0; if last.male then last_m = 1; n = n+1; if(eof) then call symput("n_ss",n); run; /* libname out "Macintosh HD:User:dad:courses:626" ; run; */ libname out "Macintosh HD:Courses:626:oscars" ; run; %let ds=%str(osc_ma1); run; data out.&ds (keep= dataset ave_f_0 ave_f_1 ave_m_0 ave_m_1 diff_f diff_m ave_diff logrnk_n logrnk_v p_value rr_mh rr_peto) tmp (keep = dataset birth Nom1_Yr alive fakewin lag age_last male already age_nom1) ; retain identity dataset count 0 fh0-fh110 mh0-mh110 first_m datasets 1000 ave_f_0 ave_f_1 ave_m_0 ave_m_1 diff_f diff_m first_m last_m ; array haz(0:1,0:110) fh0-fh110 mh0-mh110; array sum_s(0:1,0:110) sums1-sums222; array sum_r(0:1,0:110) sumr1-sumr222; array ave_long(0:1,0:1) ave_f_0 ave_f_1 ave_m_0 ave_m_1; array diff(0:1) diff_f diff_m; if count = 0 then set haz; count = count+1; do dataset = 0 to datasets; logrnk_n = 0; logrnk_v = 0; mh_num = 0 ; mh_den = 0 ; do subject = 1 to &n_ss; set n8ed point=subject; if first_m = 1 then do; do fakewin = 0 to 1; do a = 0 to 110; sum_r(fakewin,a)=0; sum_s(fakewin,a)=0; end; end; end; already = .; fakewin = 0; do year = start_y to Final ; fu = year - start_y ; if fakewin = 0 and (ranuni(6665551) < haz(male,fu) ) then do; fakewin = 1; lag = fu; end; end; if dataset > 0 then output tmp ; * skip ; /* if dataset = 0 then fakewin = win; /* observed data */ sum_r(fakewin,0) = sum_r(fakewin,0) + 1; sum_s(fakewin,0) = sum_s(fakewin,0) + 1; do a = 1 to age_last; sum_r(fakewin,a) = sum_r(fakewin,a) + 1; if a < age_last then sum_s(fakewin,a) = sum_s(fakewin,a) + 1; if a = age_last and alive = 1 then sum_s(fakewin,a) = sum_s(fakewin,a) + 1; end; if last_m = 1 then do; * kaplan-meier estimate ; do fakewin = 0 to 1; ave_long(male,fakewin)=0; p_surv = 1; do a = 1 to 110; if sum_r(fakewin,a) > 0 then do; p_surv = p_surv * sum_s(fakewin,a)/sum_r(fakewin,a); ave_long(male,fakewin) = ave_long(male,fakewin) + p_surv; end; end; end; * log rank test , mh summary rr ; do a = 1 to 110; d_cell = sum_s(1,a); b_cell = sum_s(0,a); c_cell = sum_r(1,a) - sum_s(1,a); a_cell = sum_r(0,a) - sum_s(0,a); r0 = sum_r(0,a); r1 = sum_r(1,a); t = r0 + r1; c1 = sum_s(0,a) + sum_s(1,a); c0 = t - c1; if c0 > 0 and c1 > 0 and r0 > 0 and r1 > 0 then do; logrnk_n = logrnk_n + ( a_cell - r0 * c0 / t ); logrnk_v = logrnk_v + r0*r1*c0*c1/(t*t*(t-1)); mh_num = mh_num + a_cell*d_cell/t; mh_den = mh_den + b_cell*c_cell/t; end; end; end; */ * skip ; end; /* of patients */ diff(0) = ave_long(0,1) - ave_long(0,0); diff(1) = ave_long(1,1) - ave_long(1,0); ave_diff = ( diff(0) + diff(1) ) /2 ; rr_mh = mh_den / mh_num ; rr_peto = exp(-logrnk_n/logrnk_v); p_value = 2* (1 - probnorm( abs( logrnk_n/sqrt(logrnk_v) ) ) ); output out.&ds; *put 'dataset ' dataset; end; /* of this dataset */ stop; proc means data = out.&ds ; var ave_diff rr_mh; where (dataset > 0); run; run; /* The SAS System 6 21:43 Wednesday, March 1, 2006 Variable N Mean Std Dev Minimum Maximum ---------------------------------------------------------------------- AVE_DIFF 4000 0.8003191 1.2259726 -3.2441076 5.4134766 RR_MH 4000 0.9382570 0.1126533 0.5394684 1.3865885 ---------------------------------------------------------------------- */ * recover null hr? proc sort data =tmp; by dataset; Proc phreg data = tmp noprint outest = betas ; by dataset; model age_last * alive(1) = male birth already; already = .; if (age_nom1 <= age_last) then already = 0; if (age_nom1 <= age_last) and (fakewin = 1) and ( (age_nom1 + lag) <= age_last) then already = 1; run; proc univariate data=betas; var already; run; /* Univariate Procedure Variable=ALREADY Moments N 400 Sum Wgts 400 Mean 0.018504 Sum 7.401479 Std Dev 0.113044 Variance 0.012779 Skewness -0.13605 Kurtosis -0.32624 USS 5.235799 CSS 5.098844 CV 610.9292 Std Mean 0.005652 T:Mean=0 3.273702 Pr>|T| 0.0012 Num ^= 0 400 Num > 0 228 M(Sign) 28 Pr>=|M| 0.0059 Sgn Rank 7650 Pr>=|S| 0.0009 Quantiles(Def=5) 100% Max 0.286293 99% 0.261667 75% Q3 0.099587 95% 0.196039 50% Med 0.021098 90% 0.162343 25% Q1 -0.05565 10% -0.12766 0% Min -0.28292 5% -0.17463 1% -0.25636 */ * --------------- end ---------------------------- ;