# change working directory [may also be able to do via menu] setwd("path") library(survival) # dataset, which I will call ds ds <- read.table("phdDataForR.txt",header=TRUE) # also wish to use variables without pointing to dataset attach(ds) # fit and plot k-m curve (overall) fit <- survfit(Surv(duration, phd), data=ds) plot(fit) #see documentaion for how to add labels etc to plot # fit and plot k-m curve (overall) fit.one <- survfit(Surv(duration, phd), data=ds) # [ same as fit <- survfit(Surv(duration, phd) ~ 1, data=ds) ] plot(fit.one) # looks at structure / cntents of fit.one (note n.risk, n.event and time for later) str(fit.one) # create two cohorts and fit and plot curves early = yr.entry < 1990 fit.two <- survfit(Surv(duration, phd) ~ early, data=ds) plot(fit.one) #see documentaion for how to add labels etc to plot #S[t] table summary(fit.two) summary(fit.two) ###################### Nelson-Aalen # FROM FIRST PRINCIPLES (IE FROM DEFINITION) # use n.risk and n.event to get (non zero portions of) hazard function h = fit.one$n.event/fit.one$n.risk h # compute integral (ie cumulative sums ) of h .. cumsum is general H = cumsum(h) H # S.hat = exp[ - integral ] S.hat = exp( - H ) S.hat # plot N-A curve, and its complement year = fit.one$time plot(year,S.hat) # plot its complement graduated = 1 - S.hat plot(year,graduated) # USING coxph model with no covariates (NA is default ) # learned by googling nelson-aalen R bh = basehaz(coxph(Surv(duration,phd)~1)) ; bh ########### comparing curves ############### survdiff(Surv(duration, phd) ~ early)