# old SAS code..  R code (2017) is further down the page

data condos;
input floor number area price;
price = price *1000;
area_flr = area*floor; log_area=log(area);
lines;

1 01 925   .
1 03 661   .
1 04 1007 234
1 05 960 200
1 06 818   .
1 07 864 186
1 08 822 192
1 09 788   .
1 10 815 192
1 11 780 174
1 12 763 186
1 14 820 189
1 16 675   .

2 01 795 180
2 02 670   .
2 03 790   .
2 04 777 188
2 05 865 188
2 06 744 183
2 07 864 188
2 08 982 219
2 09 788   .
2 10 972 219
2 11 780 180
2 12 753 182
2 14 820 186
2 16 675   .

3 01 795 185
3 02 670   .
3 03 790 175
3 04 777 191
3 05 865 192
3 06 744 186
3 07 864 192
3 08 982 223
3 09 788 175
3 10 972 223
3 11 780 183
3 12 753 186
3 14 820 189
3 16 675   .


4 01 795 189
4 02 670   .
4 03 790 182
4 04 777   .
4 05 865 199
4 06 744 191
4 07 864 199
4 08 982 230
4 09 788 182
4 10 972 229
4 11 780 187
4 12 753 191
4 14 820   .
4 16 675   .

5 01 795 204
5 02 670   .
5 03 790 189
5 04 777 200
5 05 865   .
5 06 744   .
5 07 864 207
5 08 982 237
5 09 788 189
5 10 972 237
5 11 780 194
5 12 753   .
5 14 820   .
5 16 675 151
;
run;

proc means data=condos; run;

proc genmod  data=condos;
 model price = floor area;
 where (floor > 1);
run;

proc genmod  data=condos;
 model price =  area area_flr / noint;
 where (floor > 1);
run;

proc genmod  data=condos;
 model price =  floor / offset=log_area link = log;
 where (floor > 1);
run;


* ********** R 2017  *************** ;

#read data in from a separate file…

setwd("/Users/jameshanley/Dropbox/Courses/634/regressionRates")


ds <- read.table("condoPricesDataR.txt",header=T)
ds = ds[ds$etage>1 & !is.na(ds$price),]
ds$extra.etage = ds$etage -2
ds$price = ds$price *1000; 
ds$area.extra.etage = ds$area*ds$etage;
ds$log.area=log(ds$area)

## additive models

   # 'default' gaussian variation assumes sd(redidual) indep. of fitted mean

   fit.i.g=glm(price ~ -1 + area + area.extra.etage,
   data=ds)
   summary(fit.i.g)

   # using 'poisson' variation forces sd(redidual) = sqrt(fitted mean)
   # notice the 'too narrow' model-based SE's for coefficients

   fit.i.p=glm(price ~ -1 + area + area.extra.etage,
      family=poisson(link=identity),
      data=ds)
   summary(fit.i.p)

   # using 'quasipoisson' variation allows sd(redidual) > sqrt(fitted) ['overdispersion']
   # notice the wider empirical SE's for coefficients

   fit.i.qp=glm(price ~ -1 + area + area.extra.etage,
    family=quasipoisson(link=identity),
   data=ds)
   summary(fit.i.qp)
   
## multiplicative models

   # 'default' gaussian variation assumes sd(redidual) indep. of fitted mean

   fit.ii.g=glm(price ~ extra.etage, offset=log(area),
    family=gaussian(link=log), data=ds)
   summary(fit.ii.g)
   0.001*round(1000*exp(fit.ii.g$coefficients))

   # using 'poisson' variation forces sd(redidual) = sqrt(fitted mean)
   # notice the 'too narrow' model-based SE's for coefficients

   fit.ii.p=glm(price ~ extra.etage, family=poisson,
   data=ds)
   summary(fit.ii.p)
   0.001*round(1000*exp(fit.ii.p$coefficients))

   # using 'quasipoisson' variation allows sd(redidual) > sqrt(fitted) ['overdispersion']
   # notice the wider empirical SE's for coefficients

   fit.ii.qp=glm(price ~ extra.etage, 
    family=quasipoisson, data=ds)
   summary(fit.ii.qp)
   0.001*round(1000*exp(fit.ii.qp$coefficients))
   
   
###  N(mu,sigma) model with sigma = mu^power

loglik=function(par) {
	b.0=par[1]
	b.1=par[2]
	power = par[3]
	mu = ( b.0 + b.1*ds$extra.etage ) * ds$area
	sigma = mu^power
	return( -sum(log(dnorm(ds$price,mu,sigma))) )
}
initial=c(200,0,1)
loglik(initial)

fit=optim(initial, loglik,method="BFGS",hessian=TRUE)
fit
par = fit$par