# old SAS code.. R code (2017) is further down the page data condos; input floor number area price; price = price *1000; area_flr = area*floor; log_area=log(area); lines; 1 01 925 . 1 03 661 . 1 04 1007 234 1 05 960 200 1 06 818 . 1 07 864 186 1 08 822 192 1 09 788 . 1 10 815 192 1 11 780 174 1 12 763 186 1 14 820 189 1 16 675 . 2 01 795 180 2 02 670 . 2 03 790 . 2 04 777 188 2 05 865 188 2 06 744 183 2 07 864 188 2 08 982 219 2 09 788 . 2 10 972 219 2 11 780 180 2 12 753 182 2 14 820 186 2 16 675 . 3 01 795 185 3 02 670 . 3 03 790 175 3 04 777 191 3 05 865 192 3 06 744 186 3 07 864 192 3 08 982 223 3 09 788 175 3 10 972 223 3 11 780 183 3 12 753 186 3 14 820 189 3 16 675 . 4 01 795 189 4 02 670 . 4 03 790 182 4 04 777 . 4 05 865 199 4 06 744 191 4 07 864 199 4 08 982 230 4 09 788 182 4 10 972 229 4 11 780 187 4 12 753 191 4 14 820 . 4 16 675 . 5 01 795 204 5 02 670 . 5 03 790 189 5 04 777 200 5 05 865 . 5 06 744 . 5 07 864 207 5 08 982 237 5 09 788 189 5 10 972 237 5 11 780 194 5 12 753 . 5 14 820 . 5 16 675 151 ; run; proc means data=condos; run; proc genmod data=condos; model price = floor area; where (floor > 1); run; proc genmod data=condos; model price = area area_flr / noint; where (floor > 1); run; proc genmod data=condos; model price = floor / offset=log_area link = log; where (floor > 1); run; * ********** R 2017 *************** ; #read data in from a separate fileā€¦ setwd("/Users/jameshanley/Dropbox/Courses/634/regressionRates") ds <- read.table("condoPricesDataR.txt",header=T) ds = ds[ds$etage>1 & !is.na(ds$price),] ds$extra.etage = ds$etage -2 ds$price = ds$price *1000; ds$area.extra.etage = ds$area*ds$etage; ds$log.area=log(ds$area) ## additive models # 'default' gaussian variation assumes sd(redidual) indep. of fitted mean fit.i.g=glm(price ~ -1 + area + area.extra.etage, data=ds) summary(fit.i.g) # using 'poisson' variation forces sd(redidual) = sqrt(fitted mean) # notice the 'too narrow' model-based SE's for coefficients fit.i.p=glm(price ~ -1 + area + area.extra.etage, family=poisson(link=identity), data=ds) summary(fit.i.p) # using 'quasipoisson' variation allows sd(redidual) > sqrt(fitted) ['overdispersion'] # notice the wider empirical SE's for coefficients fit.i.qp=glm(price ~ -1 + area + area.extra.etage, family=quasipoisson(link=identity), data=ds) summary(fit.i.qp) ## multiplicative models # 'default' gaussian variation assumes sd(redidual) indep. of fitted mean fit.ii.g=glm(price ~ extra.etage, offset=log(area), family=gaussian(link=log), data=ds) summary(fit.ii.g) 0.001*round(1000*exp(fit.ii.g$coefficients)) # using 'poisson' variation forces sd(redidual) = sqrt(fitted mean) # notice the 'too narrow' model-based SE's for coefficients fit.ii.p=glm(price ~ extra.etage, family=poisson, data=ds) summary(fit.ii.p) 0.001*round(1000*exp(fit.ii.p$coefficients)) # using 'quasipoisson' variation allows sd(redidual) > sqrt(fitted) ['overdispersion'] # notice the wider empirical SE's for coefficients fit.ii.qp=glm(price ~ extra.etage, family=quasipoisson, data=ds) summary(fit.ii.qp) 0.001*round(1000*exp(fit.ii.qp$coefficients)) ### N(mu,sigma) model with sigma = mu^power loglik=function(par) { b.0=par[1] b.1=par[2] power = par[3] mu = ( b.0 + b.1*ds$extra.etage ) * ds$area sigma = mu^power return( -sum(log(dnorm(ds$price,mu,sigma))) ) } initial=c(200,0,1) loglik(initial) fit=optim(initial, loglik,method="BFGS",hessian=TRUE) fit par = fit$par