categorical Chris Parrish June 7, 2016 Contents Waffle House 2 data..................................................... 3 exploratory data analysis......................................... 3 Waffle Houses............................................. 3 Marriage.s.............................................. 4 MedianAgeMarriage.s....................................... 5 MedianAgeMarriage.s and Marriage.s.............................. 6 model divorce rate ~ age at marriage................................... 7 map m5.1............................................... 7 model divorce rate ~ marriage rate..................................... 9 map m5.2............................................... 9 model divorce rate ~ age at marriage and marriage rate......................... 10 map m5.3............................................... 10 model marriage rate ~ age at marriage.................................. 11 map m5.4............................................... 11 plotting multivariate posteriors...................................... 12 predictor residual plots........................................ 12 counterfactual plots......................................... 13 posterior prediction plots...................................... 16 simulating spurious association................................... 18 milk 19 data..................................................... 20 map..................................................... 20 m5.5 with NA............................................. 20 m5.5 complete cases......................................... 20 m5.6.................................................. 22 m5.7.................................................. 22 legs 24 data..................................................... 24 exploratory data analysis......................................... 25 model.................................................... 26 map..................................................... 26 parameter distributions.......................................... 27 samples................................................... 30 parameter distributions.......................................... 32 milk 35 data..................................................... 35 exploratory data analysis......................................... 35 model.................................................... 38 map..................................................... 38 parameter distributions....................................... 39 plot parameter distribution..................................... 39 1
map..................................................... 40 plot parameter distribution..................................... 41 map..................................................... 42 plot parameter distributions..................................... 43 correlated predictor variables....................................... 45 plants 46 data..................................................... 46 exploratory data analysis......................................... 47 map..................................................... 48 parameter distributions....................................... 49 plot parameter distributions..................................... 49 map..................................................... 49 plot parameter distributions..................................... 50 Dobe!Kung 52 data..................................................... 52 exploratory data analysis......................................... 52 model.................................................... 53 map.................................................. 53 PI for male height.......................................... 54 reparametrize the model.......................................... 54 model................................................. 54 map.................................................. 54 parameter distributions....................................... 55 milk..................................................... 55 data.................................................. 55 map.................................................. 56 analysis................................................ 56 unique intercepts........................................... 57 lm 58 categorical references: - McElreath, Statistical Rethinking, chap 5, pp.119-164 -!Kung people - images of!kung people Waffle House library(rethinking) ## Loading required package: rstan ## Loading required package: ggplot2 ## rstan (Version 2.9.0-3, packaged: 2016-02-11 15:54:41 UTC, GitRev: 05c3d0058b6a) ## For execution on a local, multicore CPU with excess RAM we recommend calling ## rstan_options(auto_write = TRUE) ## options(mc.cores = parallel::detectcores()) ## Loading required package: parallel 2
## rethinking (Version 1.58) library(ggplot2) data ## R code 5.1 # load data data(waffledivorce) d <- WaffleDivorce str(d) ## 'data.frame': 50 obs. of 13 variables: ## $ Location : Factor w/ 50 levels "Alabama","Alaska",..: 1 2 3 4 5 6 7 8 9 10... ## $ Loc : Factor w/ 50 levels "AK","AL","AR",..: 2 1 4 3 5 6 7 9 8 10... ## $ Population : num 4.78 0.71 6.33 2.92 37.25... ## $ MedianAgeMarriage: num 25.3 25.2 25.8 24.3 26.8 25.7 27.6 26.6 29.7 26.4... ## $ Marriage : num 20.2 26 20.3 26.4 19.1 23.5 17.1 23.1 17.7 17... ## $ Marriage.SE : num 1.27 2.93 0.98 1.7 0.39 1.24 1.06 2.89 2.53 0.58... ## $ Divorce : num 12.7 12.5 10.8 13.5 8 11.6 6.7 8.9 6.3 8.5... ## $ Divorce.SE : num 0.79 2.05 0.74 1.22 0.24 0.94 0.77 1.39 1.89 0.32... ## $ WaffleHouses : int 128 0 18 41 0 11 0 3 0 133... ## $ South : int 1 0 0 1 0 0 0 0 0 1... ## $ Slaves1860 : int 435080 0 0 111115 0 0 0 1798 0 61745... ## $ Population1860 : int 964201 0 0 435450 379994 34277 460147 112216 75080 140424... ## $ PropSlaves1860 : num 0.45 0 0 0.26 0 0 0 0.016 0 0.44... head(d) ## Location Loc Population MedianAgeMarriage Marriage Marriage.SE Divorce ## 1 Alabama AL 4.78 25.3 20.2 1.27 12.7 ## 2 Alaska AK 0.71 25.2 26.0 2.93 12.5 ## 3 Arizona AZ 6.33 25.8 20.3 0.98 10.8 ## 4 Arkansas AR 2.92 24.3 26.4 1.70 13.5 ## 5 California CA 37.25 26.8 19.1 0.39 8.0 ## 6 Colorado CO 5.03 25.7 23.5 1.24 11.6 ## Divorce.SE WaffleHouses South Slaves1860 Population1860 PropSlaves1860 ## 1 0.79 128 1 435080 964201 0.45 ## 2 2.05 0 0 0 0 0.00 ## 3 0.74 18 0 0 0 0.00 ## 4 1.22 41 1 111115 435450 0.26 ## 5 0.24 0 0 0 379994 0.00 ## 6 0.94 11 0 0 34277 0.00 exploratory data analysis Waffle Houses ggplot(d, aes(x = WaffleHouses / Population, y = Divorce)) + geom_point(shape = 20, color = "darkred") + geom_smooth(method = "lm") + labs(x = "Waffle Houses per million", y = "Divorce rate") 3
14 12 Divorce rate 10 8 6 0 10 20 30 40 Waffle Houses per million Marriage.s Standardize marriage. d$marriage.s <- (d$marriage - mean(d$marriage))/sd(d$marriage) ggplot(d, aes(marriage.s, Divorce)) + geom_point(aes(x = Marriage.s, y = Divorce), shape = 20, color = "darkred") + geom_smooth(method = "lm") + labs(x = "Marriage.s", y = "Divorce") 4
12 Divorce 10 8 6 1 0 1 2 3 Marriage.s M edianagem arriage.s Standardize median age at marriage. # standardize predictor d$medianagemarriage.s <- (d$medianagemarriage-mean(d$medianagemarriage))/ sd(d$medianagemarriage) ggplot(d, aes(medianagemarriage.s, Divorce)) + geom_point(aes(x = MedianAgeMarriage.s, y = Divorce), shape = 20, color = "darkred") + geom_smooth(method = "lm") + labs(x = "MedianAgeMarriage.s", y = "Divorce") 5
12 Divorce 10 8 6 2 1 0 1 2 3 MedianAgeMarriage.s M edianagem arriage.s and M arriage.s How are marriage rate and median age at marriage related? ggplot(d, aes(medianagemarriage.s, Marriage.s)) + geom_point(shape = 20, color = "darkred") + geom_smooth(method = "lm") + labs(x = "MedianAgeMarriage.s", y = "Marriage.s") 6
3 2 1 Marriage.s 0 1 2 3 2 1 0 1 2 3 MedianAgeMarriage.s model divorce rate ~ age at marriage. D i Normal(µ i, σ) µ i = α + β A A i α Normal(10, 10) β A Normal(0, 1) σ Uniform(0, 10) map m5.1 # fit model m5.1 <- map( alist( Divorce ~ dnorm( mu, sigma ), mu <- a + ba * MedianAgeMarriage.s, a ~ dnorm( 10, 10 ), ba ~ dnorm( 0, 1 ), sigma ~ dunif( 0, 10 ) ), data = d ) precis(m5.1) ## Mean StdDev 5.5% 94.5% ## a 9.69 0.20 9.36 10.02 7
## ba -1.04 0.20-1.37-0.72 ## sigma 1.45 0.14 1.22 1.68 plot( precis(m5.1) ) a ba sigma 0 2 4 6 8 10 Value ## R code 5.2 # compute percentile interval of mean MAM.seq <- seq( from=-3, to=3.5, length.out=30 ) mu <- link( m5.1, data=data.frame(medianagemarriage.s=mam.seq) ) ## [ 100 / 1000 ] [ 200 / 1000 ] [ 300 / 1000 ] [ 400 / 1000 ] [ 500 / 1000 ] [ 600 / 1000 ] [ 700 / 1000 ] [ 800 / 1000 ] [ 900 / 1000 ] [ 1000 / 1000 ] mu.pi <- apply( mu, 2, PI ) # plot it all plot( Divorce ~ MedianAgeMarriage.s, data=d, col=rangi2 ) abline( m5.1 ) ## Warning in abline(m5.1): only using the first two of 3 regression ## coefficients shade( mu.pi, MAM.seq ) 8
Divorce 6 8 10 12 2 1 0 1 2 3 MedianAgeMarriage.s model divorce rate ~ marriage rate. D i Normal(µ i, σ) µ i = α + β R R i α Normal(10, 10) β R Normal(0, 1) σ Uniform(0, 10) map m5.2 ## R code 5.3 d$marriage.s <- (d$marriage - mean(d$marriage))/sd(d$marriage) m5.2 <- map( alist( Divorce ~ dnorm( mu, sigma ), mu <- a + br * Marriage.s, a ~ dnorm( 10, 10 ), br ~ dnorm( 0, 1 ), sigma ~ dunif( 0, 10 ) ), data = d ) precis(m5.2) ## Mean StdDev 5.5% 94.5% ## a 9.69 0.24 9.31 10.07 ## br 0.64 0.23 0.27 1.02 ## sigma 1.67 0.17 1.40 1.94 plot( precis(m5.2) ) 9
a br sigma 0 2 4 6 8 10 Value model divorce rate ~ age at marriage and marriage rate. D i Normal(µ i, σ) µ i = α + β R R i + β A A i α Normal(10, 10) β R Normal(0, 1) β A Normal(0, 1) σ Uniform(0, 10) map m5.3 ## R code 5.4 m5.3 <- map( alist( Divorce ~ dnorm( mu, sigma ), mu <- a + br*marriage.s + ba*medianagemarriage.s, a ~ dnorm( 10, 10 ), br ~ dnorm( 0, 1 ), ba ~ dnorm( 0, 1 ), sigma ~ dunif( 0, 10 ) ), data = d ) precis( m5.3 ) ## Mean StdDev 5.5% 94.5% ## a 9.69 0.20 9.36 10.01 ## br -0.13 0.28-0.58 0.31 ## ba -1.13 0.28-1.58-0.69 ## sigma 1.44 0.14 1.21 1.67 ## R code 5.5 plot( precis(m5.3) ) 10
a br ba sigma 2 0 2 4 6 8 10 Value model marriage rate ~ age at marriage map m5.4 ## R code 5.6 m5.4 <- map( alist( Marriage.s ~ dnorm( mu, sigma ), mu <- a + b*medianagemarriage.s, a ~ dnorm( 0, 10 ), b ~ dnorm( 0, 1 ), sigma ~ dunif( 0, 10 ) ), data = d ) precis(m5.4) ## Mean StdDev 5.5% 94.5% ## a 0.00 0.10-0.16 0.16 ## b -0.71 0.10-0.87-0.56 ## sigma 0.69 0.07 0.58 0.80 plot( precis(m5.4) ) 11
a b sigma 0.5 0.0 0.5 Value plotting multivariate posteriors predictor residual plots ## R code 5.7 # compute expected value at MAP, for each State mu <- coef(m5.4)['a'] + coef(m5.4)['b']*d$medianagemarriage.s # compute residual for each State m.resid <- d$marriage.s - mu ## R code 5.8 plot( Marriage.s ~ MedianAgeMarriage.s, d, col=rangi2 ) abline( m5.4 ) ## Warning in abline(m5.4): only using the first two of 3 regression ## coefficients # loop over States for ( i in 1:length(m.resid) ) { x <- d$medianagemarriage.s[i] # x location of line segment y <- d$marriage.s[i] # observed endpoint of line segment # draw the line segment lines( c(x,x), c(mu[i],y), lwd=0.5, col=col.alpha("black",0.7) ) } 12
Marriage.s 1 0 1 2 2 1 0 1 2 3 MedianAgeMarriage.s counterfactual plots ## R code 5.9 # prepare new counterfactual data A.avg <- mean( d$medianagemarriage.s ) R.seq <- seq( from=-3, to=3, length.out=30 ) pred.data <- data.frame( Marriage.s=R.seq, MedianAgeMarriage.s=A.avg ) # compute counterfactual mean divorce (mu) mu <- link( m5.3, data=pred.data ) ## [ 100 / 1000 ] [ 200 / 1000 ] [ 300 / 1000 ] [ 400 / 1000 ] [ 500 / 1000 ] [ 600 / 1000 ] [ 700 / 1000 ] [ 800 / 1000 ] [ 900 / 1000 ] [ 1000 / 1000 ] mu.mean <- apply( mu, 2, mean ) mu.pi <- apply( mu, 2, PI ) # simulate counterfactual divorce outcomes R.sim <- sim( m5.3, data=pred.data, n=1e4 ) ## [ 1000 / 10000 ] 13
[ 2000 / 10000 ] [ 3000 / 10000 ] [ 4000 / 10000 ] [ 5000 / 10000 ] [ 6000 / 10000 ] [ 7000 / 10000 ] [ 8000 / 10000 ] [ 9000 / 10000 ] [ 10000 / 10000 ] R.PI <- apply( R.sim, 2, PI ) # display predictions, hiding raw data with type="n" plot( Divorce ~ Marriage.s, data=d, type="n" ) mtext( "MedianAgeMarriage.s = 0" ) lines( R.seq, mu.mean ) shade( mu.pi, R.seq ) shade( R.PI, R.seq ) MedianAgeMarriage.s = 0 Divorce 6 8 10 12 1 0 1 2 Marriage.s ## R code 5.10 R.avg <- mean( d$marriage.s ) A.seq <- seq( from=-3, to=3.5, length.out=30 ) pred.data2 <- data.frame( Marriage.s=R.avg, MedianAgeMarriage.s=A.seq ) mu <- link( m5.3, data=pred.data2 ) ## [ 100 / 1000 ] [ 200 / 1000 ] [ 300 / 1000 ] [ 400 / 1000 ] 14
[ 500 / 1000 ] [ 600 / 1000 ] [ 700 / 1000 ] [ 800 / 1000 ] [ 900 / 1000 ] [ 1000 / 1000 ] mu.mean <- apply( mu, 2, mean ) mu.pi <- apply( mu, 2, PI ) A.sim <- sim( m5.3, data=pred.data2, n=1e4 ) ## [ 1000 / 10000 ] [ 2000 / 10000 ] [ 3000 / 10000 ] [ 4000 / 10000 ] [ 5000 / 10000 ] [ 6000 / 10000 ] [ 7000 / 10000 ] [ 8000 / 10000 ] [ 9000 / 10000 ] [ 10000 / 10000 ] A.PI <- apply( A.sim, 2, PI ) plot( Divorce ~ MedianAgeMarriage.s, data=d, type="n" ) mtext( "Marriage.s = 0" ) lines( A.seq, mu.mean ) shade( mu.pi, A.seq ) shade( A.PI, A.seq ) Marriage.s = 0 Divorce 6 8 10 12 2 1 0 1 2 3 MedianAgeMarriage.s 15
posterior prediction plots ## R code 5.11 # call link without specifying new data # so it uses original data mu <- link( m5.3 ) ## [ 100 / 1000 ] [ 200 / 1000 ] [ 300 / 1000 ] [ 400 / 1000 ] [ 500 / 1000 ] [ 600 / 1000 ] [ 700 / 1000 ] [ 800 / 1000 ] [ 900 / 1000 ] [ 1000 / 1000 ] # summarize samples across cases mu.mean <- apply( mu, 2, mean ) mu.pi <- apply( mu, 2, PI ) # simulate observations # again no new data, so uses original data divorce.sim <- sim( m5.3, n=1e4 ) ## [ 1000 / 10000 ] [ 2000 / 10000 ] [ 3000 / 10000 ] [ 4000 / 10000 ] [ 5000 / 10000 ] [ 6000 / 10000 ] [ 7000 / 10000 ] [ 8000 / 10000 ] [ 9000 / 10000 ] [ 10000 / 10000 ] divorce.pi <- apply( divorce.sim, 2, PI ) ## R code 5.12 plot( mu.mean ~ d$divorce, col=rangi2, ylim=range(mu.pi), xlab="observed divorce", ylab="predicted divorce" ) abline( a=0, b=1, lty=2 ) for ( i in 1:nrow(d) ) lines( rep(d$divorce[i],2), c(mu.pi[1,i],mu.pi[2,i]), col=rangi2 ) ## R code 5.13 identify( x=d$divorce, y=mu.mean, labels=d$loc, cex=0.8 ) 16
Predicted divorce 6 8 10 12 ## integer(0) 6 8 10 12 Observed divorce ## R code 5.14 # compute residuals divorce.resid <- d$divorce - mu.mean # get ordering by divorce rate o <- order(divorce.resid) # make the plot dotchart( divorce.resid[o], labels=d$loc[o], xlim=c(-6,5), cex=0.6 ) abline( v=0, col=col.alpha("black",0.2) ) for ( i in 1:nrow(d) ) { j <- o[i] # which State in order lines( d$divorce[j]-c(mu.pi[1,j],mu.pi[2,j]), rep(i,2) ) points( d$divorce[j]-c(divorce.pi[1,j],divorce.pi[2,j]), rep(i,2), pch=3, cex=0.6, col="gray" ) } 17
ME AR AL AK KY GA OK CO RI LA MS NH IN TN AZ SD OR VT WV NM WA MA MD KS IA OH DC NC DE MI HI TX VA MO WY IL MT FL CA NY PA WI SC NE CT UT ND MN NJ ID 6 4 2 0 2 4 simulating spurious association ## R code 5.15 N <- 100 # number of cases x_real <- rnorm( N ) # x_real as Gaussian with mean 0 and stddev 1 x_spur <- rnorm( N, x_real ) # x_spur as Gaussian with mean=x_real y <- rnorm( N, x_real ) # y as Gaussian with mean=x_real d <- data.frame(y,x_real,x_spur) # bind all together in data frame pairs(~ y + x_real + x_spur, data=d, col="darkred") 18
2 1 0 1 2 y 3 1 1 3 2 0 1 2 x_real x_spur 3 1 1 3 3 2 1 0 1 2 3 3 2 1 0 1 2 3 demo.lm <- lm(y ~ x_real + x_spur, data=d) options(show.signif.stars=false) summary(demo.lm) ## ## Call: ## lm(formula = y ~ x_real + x_spur, data = d) ## ## Residuals: ## Min 1Q Median 3Q Max ## -2.12762-0.57146 0.07675 0.63496 2.56756 ## ## Coefficients: ## Estimate Std. Error t value Pr(> t ) ## (Intercept) -0.08391 0.09506-0.883 0.3796 ## x_real 1.06984 0.12303 8.696 8.74e-14 ## x_spur -0.16181 0.08652-1.870 0.0645 ## ## Residual standard error: 0.942 on 97 degrees of freedom ## Multiple R-squared: 0.5124, Adjusted R-squared: 0.5024 ## F-statistic: 50.98 on 2 and 97 DF, p-value: 7.398e-16 milk library(rethinking) 19
data ## R code 5.16 data(milk) d <- milk str(d) ## 'data.frame': 29 obs. of 8 variables: ## $ clade : Factor w/ 4 levels "Ape","New World Monkey",..: 4 4 4 4 4 2 2 2 2 2... ## $ species : Factor w/ 29 levels "A palliata","alouatta seniculus",..: 11 8 9 10 16 2 1 6 28 27 ## $ kcal.per.g : num 0.49 0.51 0.46 0.48 0.6 0.47 0.56 0.89 0.91 0.92... ## $ perc.fat : num 16.6 19.3 14.1 14.9 27.3... ## $ perc.protein : num 15.4 16.9 16.9 13.2 19.5... ## $ perc.lactose : num 68 63.8 69 71.9 53.2... ## $ mass : num 1.95 2.09 2.51 1.62 2.19 5.25 5.37 2.51 0.71 0.68... ## $ neocortex.perc: num 55.2 NA NA NA NA... map m5.5 with NA Problem here with incomplete cases. ## R code 5.17 # m5.5 <- map( # alist( # kcal.per.g ~ dnorm( mu, sigma ), # mu <- a + bn*neocortex.perc, # a ~ dnorm( 0, 100 ), # bn ~ dnorm( 0, 1 ), # sigma ~ dunif( 0, 1 ) # ), # data=d ) ## R code 5.18 d$neocortex.perc ## [1] 55.16 NA NA NA NA 64.54 64.54 67.64 NA 68.85 58.85 ## [12] 61.69 60.32 NA NA 69.97 NA 70.41 NA 73.40 NA 67.53 ## [23] NA 71.26 72.60 NA 70.24 76.30 75.49 m5.5 complete cases ## R code 5.19 dcc <- d[ complete.cases(d), ] a.start <- mean(dcc$kcal.per.g) bn.start <- 0 ## R code 5.20 m5.5 <- map( alist( kcal.per.g ~ dnorm( mu, sigma ), 20
mu <- a + bn*neocortex.perc, a ~ dnorm( 0, 100 ), bn ~ dnorm( 0, 1 ), sigma ~ dunif( 0, 1 ) ), data = dcc, start = list(a=a.start, bn=bn.start)) ## R code 5.21 precis( m5.5, digits=3 ) ## Mean StdDev 5.5% 94.5% ## a 0.353 0.471-0.399 1.106 ## bn 0.005 0.007-0.007 0.016 ## sigma 0.166 0.028 0.120 0.211 ## R code 5.22 coef(m5.5)["bn"] * ( 76-55 ) ## bn ## 0.09456727 ## R code 5.23 np.seq <- 0:100 pred.data <- data.frame( neocortex.perc=np.seq ) mu <- link( m5.5, data=pred.data, n=1e4 ) ## [ 1000 / 10000 ] [ 2000 / 10000 ] [ 3000 / 10000 ] [ 4000 / 10000 ] [ 5000 / 10000 ] [ 6000 / 10000 ] [ 7000 / 10000 ] [ 8000 / 10000 ] [ 9000 / 10000 ] [ 10000 / 10000 ] mu.mean <- apply( mu, 2, mean ) mu.pi <- apply( mu, 2, PI ) plot( kcal.per.g ~ neocortex.perc, data=dcc, col=rangi2 ) lines( np.seq, mu.mean ) lines( np.seq, mu.pi[1,], lty=2 ) lines( np.seq, mu.pi[2,], lty=2 ) 21
kcal.per.g 0.5 0.6 0.7 0.8 0.9 55 60 65 70 75 ## R code 5.24 dcc$log.mass <- log(dcc$mass) neocortex.perc m5.6 ## R code 5.25 m5.6 <- map( alist( kcal.per.g ~ dnorm( mu, sigma ), mu <- a + bm*log.mass, a ~ dnorm( 0, 100 ), bm ~ dnorm( 0, 1 ), sigma ~ dunif( 0, 1 ) ), data=dcc ) precis(m5.6) ## Mean StdDev 5.5% 94.5% ## a 0.71 0.05 0.63 0.78 ## bm -0.03 0.02-0.06 0.00 ## sigma 0.16 0.03 0.11 0.20 m5.7 Bad start value (1) ## R code 5.26 m5.7 <- map( alist( kcal.per.g ~ dnorm( mu, sigma ), mu <- a + bn*neocortex.perc + bm*log.mass, 22
a ~ dnorm( 0, 100 ), bn ~ dnorm( 0, 1 ), bm ~ dnorm( 0, 1 ), sigma ~ dunif( 0, 1 ) ), data=dcc ) precis(m5.7) ## Mean StdDev 5.5% 94.5% ## a -1.08 0.47-1.83-0.34 ## bn 0.03 0.01 0.02 0.04 ## bm -0.10 0.02-0.13-0.06 ## sigma 0.11 0.02 0.08 0.15 ## R code 5.27 mean.log.mass <- mean( log(dcc$mass) ) np.seq <- 0:100 pred.data <- data.frame( neocortex.perc=np.seq, log.mass=mean.log.mass ) mu <- link( m5.7, data=pred.data, n=1e4 ) ## [ 1000 / 10000 ] [ 2000 / 10000 ] [ 3000 / 10000 ] [ 4000 / 10000 ] [ 5000 / 10000 ] [ 6000 / 10000 ] [ 7000 / 10000 ] [ 8000 / 10000 ] [ 9000 / 10000 ] [ 10000 / 10000 ] mu.mean <- apply( mu, 2, mean ) mu.pi <- apply( mu, 2, PI ) plot( kcal.per.g ~ neocortex.perc, data=dcc, type="n" ) lines( np.seq, mu.mean ) lines( np.seq, mu.pi[1,], lty=2 ) lines( np.seq, mu.pi[2,], lty=2 ) 23
kcal.per.g 0.5 0.6 0.7 0.8 0.9 55 60 65 70 75 neocortex.perc legs library(rethinking) library(ggplot2) library(cowplot) data Correlated predictors. ## R code 5.28 N <- 100 rho <- 0.7 x_pos <- rnorm( N ) x_neg <- rnorm( N, rho*x_pos, sqrt(1-rho^2) ) y <- rnorm( N, x_pos - x_neg ) d <- data.frame(y,x_pos,x_neg) Legs ## R code 5.29 N <- 100 height <- rnorm(n,10,2) leg_prop <- runif(n,0.4,0.5) leg_left <- leg_prop*height + rnorm( N, 0, 0.02 ) leg_right <- leg_prop*height + rnorm( N, 0, 0.02 ) # number of cases # correlation btw x_pos and x_neg # x_pos as Gaussian # x_neg correlated with x_pos # y equally associated with x_pos, x_neg # bind all together in data frame # number of individuals # sim total height of each # leg as proportion of height # sim left leg as proportion + error # sim right leg as proportion + error # combine into data frame 24
d <- data.frame(height,leg_left,leg_right) str(d) ## 'data.frame': 100 obs. of 3 variables: ## $ height : num 8.79 11.49 7.45 7.01 11.21... ## $ leg_left : num 4.37 5.03 3.31 2.84 5.51... ## $ leg_right: num 4.34 5.02 3.36 2.79 5.51... exploratory data analysis ggplot(d, aes(leg_left, leg_right)) + geom_point(shape = 20, color = "darkred") + theme_gray() 6 5 leg_right 4 3 3 4 5 6 leg_left ggplot(d, aes(leg_left, height)) + geom_point(shape = 20, color = "darkred") + geom_smooth() + theme_gray() 25
12.5 height 10.0 7.5 5.0 3 4 5 6 leg_left model h i Normal(µ i, σ) µ i = α + β l leg l + β r leg r α Normal(10, 100) β l Normal(2, 10) β r Normal(2, 10) σ Uniform(0, 10) map Start values problem (4) ## R code 5.30 m5.8 <- map( alist( height ~ dnorm( mu, sigma ), mu <- a + bl*leg_left + br*leg_right, a ~ dnorm( 10, 100 ), bl ~ dnorm( 2, 10 ), br ~ dnorm( 2, 10 ), sigma ~ dunif( 0, 10 ) ), 26
data=d ) precis(m5.8) ## Mean StdDev 5.5% 94.5% ## a 1.22 0.25 0.82 1.61 ## bl 2.64 1.82-0.27 5.54 ## br -0.69 1.82-3.60 2.21 ## sigma 0.55 0.04 0.49 0.62 ## R code 5.31 plot(precis(m5.8)) a bl br sigma 2 0 2 4 Value parameter distributions Plot parameter distributions # extract samples post <- extract.samples( m5.8 ) str(post) ## 'data.frame': 10000 obs. of 4 variables: ## $ a : num 1.155 0.96 0.933 1.689 1.387... ## $ bl : num 5.5 2.3 2.45 4.71 4.26... ## $ br : num -3.53-0.304-0.453-2.878-2.365... ## $ sigma: num 0.585 0.555 0.578 0.5 0.533... # plot each parameter distribution a.plot <- parameter.dist(parameter = "a", values = post$a) bl.plot <- parameter.dist(parameter = "bl", values = post$bl) 27
br.plot <- parameter.dist(parameter = "br", values = post$br) sigma.plot <- parameter.dist(parameter = "sigma", values = post$sigma) # display parameter distributions plot_grid(a.plot, bl.plot, br.plot, sigma.plot, labels=c("a", "bl", "br", "sigma"), ncol = 2, nrow = 2) 28
a 1.5 bl 0.20 density 1.0 0.5 density 0.15 0.10 0.05 0.0 0.5 1.0 1.5 2.0 a 0.00 5 0 5 bl 0.2 0.2 0.0 0.2 0.4 0.828 1.216 1.621 HPDI mean HPDI 0.0 0.2 0.4 0.358 2.63 5.501 HPDI mean HPDI 0.75 1.00 1.25 1.50 1.75 a 0 2 4 6 bl br sigma 0.20 9 0.15 density 0.10 density 6 0.05 3 0.00 0 4 0 4 br 0.4 0.5 0.6 0.7 sigma 0.2 0.2 0.0 0.2 0.4 3.563 0.686 2.296 HPDI mean HPDI 0.0 0.2 0.4 0.489 0.552 0.613 HPDI mean HPDI 4 2 0 2 br 0.50 0.55 0.60 sigma 29
samples ## R code 5.32 post <- extract.samples(m5.8) str(post) ## 'data.frame': 10000 obs. of 4 variables: ## $ a : num 1.06 1.32 1.58 1.05 1.31... ## $ bl : num 2.01 1.43 1.68 4.25 5.32... ## $ br : num -0.0435 0.471 0.1817-2.2573-3.4045... ## $ sigma: num 0.559 0.52 0.546 0.586 0.593... plot( bl ~ br, post, col=col.alpha(rangi2,0.1), pch=16 ) bl 5 0 5 10 ggplot(post, aes(bl, br)) + geom_point(shape = 20, color = "darkred") + theme_gray() 5 0 5 br 30
4 0 br 4 8 5 0 5 10 bl ## R code 5.33 sum_blbr <- post$bl + post$br dens( sum_blbr, col=rangi2, lwd=2, xlab="sum of bl and br" ) 31
Density 0 1 2 3 4 5 6 7 Start problem (2) 1.8 1.9 2.0 2.1 sum of bl and br ## R code 5.34 m5.9 <- map( alist( height ~ dnorm( mu, sigma ), mu <- a + bl*leg_left, a ~ dnorm( 10, 100 ), bl ~ dnorm( 2, 10 ), sigma ~ dunif( 0, 10 ) ), data=d ) precis(m5.9) ## Mean StdDev 5.5% 94.5% ## a 1.21 0.25 0.82 1.60 ## bl 1.95 0.06 1.86 2.03 ## sigma 0.55 0.04 0.49 0.62 parameter distributions Plot parameter distributions # extract samples post <- extract.samples( m5.9 ) str(post) ## 'data.frame': 10000 obs. of 3 variables: ## $ a : num 1.007 0.976 1.298 1.163 1.333... 32
## $ bl : num 1.99 1.99 1.91 1.94 1.91... ## $ sigma: num 0.575 0.522 0.529 0.536 0.606... # plot each parameter distribution a.plot <- parameter.dist(parameter = "a", values = post$a) bl.plot <- parameter.dist(parameter = "bl", values = post$bl) sigma.plot <- parameter.dist(parameter = "sigma", values = post$sigma) # display parameter distributions plot_grid(a.plot, bl.plot, sigma.plot, labels=c("a", "bl", "sigma"), ncol = 2, nrow = 2) 33
a 1.5 bl 6 density 1.0 density 4 0.5 2 0.0 0 0.5 1.0 1.5 2.0 a 1.8 1.9 2.0 2.1 bl 0.2 0.2 0.0 0.2 0.4 0.836 1.213 1.614 HPDI mean HPDI 0.0 0.2 0.4 1.854 1.945 2.03 HPDI mean HPDI sigma 10.0 0.75 1.00 1.25 1.50 1.75 a 1.9 2.0 bl 7.5 density 5.0 2.5 0.0 0.4 0.5 0.6 0.7 sigma 0.2 0.0 0.2 0.4 0.493 0.553 0.617 HPDI mean HPDI 0.50 0.55 0.60 sigma 34
milk library(rethinking) library(ggplot2) library(cowplot) theme_set(theme_gray()) data ## R code 5.35 data(milk) d <- milk str(d) ## 'data.frame': 29 obs. of 8 variables: ## $ clade : Factor w/ 4 levels "Ape","New World Monkey",..: 4 4 4 4 4 2 2 2 2 2... ## $ species : Factor w/ 29 levels "A palliata","alouatta seniculus",..: 11 8 9 10 16 2 1 6 28 27 ## $ kcal.per.g : num 0.49 0.51 0.46 0.48 0.6 0.47 0.56 0.89 0.91 0.92... ## $ perc.fat : num 16.6 19.3 14.1 14.9 27.3... ## $ perc.protein : num 15.4 16.9 16.9 13.2 19.5... ## $ perc.lactose : num 68 63.8 69 71.9 53.2... ## $ mass : num 1.95 2.09 2.51 1.62 2.19 5.25 5.37 2.51 0.71 0.68... ## $ neocortex.perc: num 55.2 NA NA NA NA... exploratory data analysis Two predictor variables are associated. ggplot(d, aes(perc.fat, kcal.per.g)) + geom_point(shape = 20, color = "darkred") + geom_smooth() 35
1.0 0.8 kcal.per.g 0.6 0.4 10 20 30 40 50 perc.fat ggplot(d, aes(perc.lactose, kcal.per.g)) + geom_point(shape = 20, color = "darkred") + geom_smooth() 36
1.0 0.8 kcal.per.g 0.6 0.4 30 40 50 60 70 perc.lactose ggplot(d, aes(perc.fat, perc.lactose)) + geom_point(shape = 20, color = "darkred") + geom_smooth() 37
80 perc.lactose 60 40 10 20 30 40 50 perc.fat model E i Normal(µ i, σ) µ i = α + β f perc.fat + β l perc.lactose α Normal(0.6, 10) β f Normal(0, 1) β l Normal(0, 1) σ Uniform(0, 10) map ## R code 5.36 # kcal.per.g regressed on perc.fat m5.10 <- map( alist( kcal.per.g ~ dnorm( mu, sigma ), mu <- a + bf*perc.fat, a ~ dnorm( 0.6, 10 ), bf ~ dnorm( 0, 1 ), sigma ~ dunif( 0, 10 ) ), data=d ) precis( m5.10, digits=3 ) 38
## Mean StdDev 5.5% 94.5% ## a 0.301 0.036 0.244 0.358 ## bf 0.010 0.001 0.008 0.012 ## sigma 0.073 0.010 0.058 0.089 plot(precis( m5.10, digits=3 )) a bf sigma 0.00 0.05 0.10 0.15 0.20 0.25 0.30 0.35 Value parameter distributions plot parameter distribution # extract samples post <- extract.samples( m5.10 ) str(post) ## 'data.frame': 10000 obs. of 3 variables: ## $ a : num 0.292 0.365 0.313 0.325 0.281... ## $ bf : num 0.01085 0.00824 0.01021 0.0092 0.01026... ## $ sigma: num 0.0793 0.0615 0.0701 0.0426 0.0647... # plot parameter distribution bf.plot <- parameter.dist(parameter = "bf", values = post$bf) bf.plot 39
400 300 density 200 100 0 0.008 0.010 0.012 0.014 bf 0.2 0.0 0.2 0.4 0.009 0.01 0.012 HPDI mean HPDI 0.008 0.009 0.010 0.011 0.012 bf map Start value (1) # kcal.per.g regressed on perc.lactose m5.11 <- map( alist( kcal.per.g ~ dnorm( mu, sigma ), mu <- a + bl*perc.lactose, a ~ dnorm( 0.6, 10 ), bl ~ dnorm( 0, 1 ), sigma ~ dunif( 0, 10 ) ), data=d ) precis( m5.11, digits=3 ) ## Mean StdDev 5.5% 94.5% ## a 1.166 0.043 1.098 1.235 ## bl -0.011 0.001-0.012-0.009 ## sigma 0.062 0.008 0.049 0.075 plot(precis( m5.11, digits=3 )) 40
a bl sigma 0.0 0.2 0.4 0.6 0.8 1.0 1.2 Value plot parameter distribution # extract samples post <- extract.samples( m5.11 ) str(post) ## 'data.frame': 10000 obs. of 3 variables: ## $ a : num 1.26 1.21 1.21 1.13 1.15... ## $ bl : num -0.0123-0.0113-0.0113-0.01-0.0103... ## $ sigma: num 0.0669 0.0668 0.0521 0.0485 0.0764... # plot parameter distribution bl.plot <- parameter.dist(parameter = "bl", values = post$bl) bl.plot 41
500 400 density 300 200 100 0 0.014 0.012 0.010 0.008 bl 0.2 0.0 0.2 0.4 0.012 0.011 0.009 HPDI mean HPDI 0.012 0.011 0.010 0.009 bl map ## R code 5.37 m5.12 <- map( alist( kcal.per.g ~ dnorm( mu, sigma ), mu <- a + bf*perc.fat + bl*perc.lactose, a ~ dnorm( 0.6, 10 ), bf ~ dnorm( 0, 1 ), bl ~ dnorm( 0, 1 ), sigma ~ dunif( 0, 10 ) ), data=d ) precis( m5.12, digits=3 ) ## Mean StdDev 5.5% 94.5% ## a 1.007 0.200 0.688 1.327 ## bf 0.002 0.002-0.002 0.006 ## bl -0.009 0.002-0.013-0.005 ## sigma 0.061 0.008 0.048 0.074 plot(precis( m5.12, digits=3 )) 42
a bf bl sigma 0.0 0.2 0.4 0.6 0.8 1.0 1.2 Value plot parameter distributions # extract samples post <- extract.samples( m5.12 ) str(post) ## 'data.frame': 10000 obs. of 4 variables: ## $ a : num 0.728 1.172 0.832 0.936 0.807... ## $ bf : num 0.0051-0.00033 0.00373 0.00342 0.00371... ## $ bl : num -0.00524-0.01077-0.00671-0.00791-0.00606... ## $ sigma: num 0.0473 0.057 0.0594 0.0643 0.0709... # plot each parameter distribution a.plot <- parameter.dist(parameter = "a", values = post$a) bf.plot <- parameter.dist(parameter = "bf", values = post$bf) bl.plot <- parameter.dist(parameter = "bl", values = post$bl) sigma.plot <- parameter.dist(parameter = "sigma", values = post$sigma) # display parameter distributions plot_grid(a.plot, bf.plot, bl.plot, sigma.plot, labels=c("a", "bf", "bl", "sigma"), ncol = 2, nrow = 2) 43
a 2.0 bf 150 1.5 density 1.0 density 100 0.5 50 0.0 0 0.5 1.0 1.5 a 0.005 0.000 0.005 0.010 bf 0.2 0.2 0.0 0.2 0.4 0.684 1.01 1.318 HPDI mean HPDI 0.0 0.2 0.4 0.002 0.002 0.006 HPDI mean HPDI bl 150 0.75 1.00 1.25 a 0.0025 0.0000 0.0025 0.0050 bf sigma 50 40 density 100 density 30 20 50 10 0 0 0.015 0.010 0.005 0.000 bl 0.04 0.06 0.08 sigma 0.2 0.2 0.0 0.2 0.4 0.012 0.009 0.005 HPDI mean HPDI 0.0 0.2 0.4 0.048 0.061 0.073 HPDI mean HPDI 0.014 0.012 0.010 0.008 0.006 0.004 bl 0.05 0.06 0.07 sigma 44
correlated predictor variables perc.f at and perc.lactose are inversely related ## R code 5.38 pairs( ~ kcal.per.g + perc.fat + perc.lactose, data=d, col=rangi2 ) 10 20 30 40 50 kcal.per.g 0.5 0.7 0.9 10 30 50 perc.fat perc.lactose 30 50 70 0.5 0.6 0.7 0.8 0.9 30 40 50 60 70 ## R code 5.39 cor( d$perc.fat, d$perc.lactose ) ## [1] -0.9416373 Simulation ## R code 5.40 data(milk) d <- milk sim.coll <- function( r=0.9 ) { d$x <- rnorm( nrow(d), mean=r*d$perc.fat, sd=sqrt( (1-r^2)*var(d$perc.fat) ) ) m <- lm( kcal.per.g ~ perc.fat + x, data=d ) sqrt( diag( vcov(m) ) )[2] # stddev of parameter } rep.sim.coll <- function( r=0.9, n=100 ) { stddev <- replicate( n, sim.coll(r) ) mean(stddev) } r.seq <- seq(from=0,to=0.99,by=0.01) stddev <- sapply( r.seq, function(z) rep.sim.coll(r=z,n=100) ) plot( stddev ~ r.seq, type="l", col=rangi2, lwd=2, xlab="correlation" ) 45
stddev 0.001 0.003 0.005 0.007 0.0 0.2 0.4 0.6 0.8 1.0 correlation plants Post-treatment bias library(rethinking) library(ggplot2) library(cowplot) theme_set(theme_gray()) data ## R code 5.41 # number of plants N <- 100 # simulate initial heights h0 <- rnorm(n,10,2) # assign treatments and simulate fungus and growth treatment <- rep( 0:1, each=n/2 ) fungus <- rbinom( N, size=1, prob=0.5 - treatment*0.4 ) h1 <- h0 + rnorm(n, 5-3*fungus) # compose a clean data frame d <- data.frame( h0=h0, h1=h1, treatment=treatment, fungus=fungus ) 46
exploratory data analysis d2 <- d d2$treatment <- factor(d2$treatment) str(d2) ## 'data.frame': 100 obs. of 4 variables: ## $ h0 : num 8.92 9.71 13.14 12.64 9.29... ## $ h1 : num 10.5 15.7 16.1 17.9 14.2... ## $ treatment: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1... ## $ fungus : int 1 0 1 0 0 0 1 0 1 0... pairs(d2, col="darkred") 10 12 14 16 18 0.0 0.4 0.8 h0 6 10 14 10 14 18 h1 treatment 1.0 1.4 1.8 0.0 0.4 0.8 fungus 6 8 10 12 14 ggplot(d2, aes(h0, h1, color = treatment)) + geom_point(shape = 20) + geom_smooth(method = "lm") 1.0 1.4 1.8 47
17.5 h1 15.0 12.5 treatment 0 1 10.0 7.5 5.0 7.5 10.0 12.5 h0 map ## R code 5.42 m5.13 <- map( alist( h1 ~ dnorm(mu,sigma), mu <- a + bh*h0 + bt*treatment + bf*fungus, a ~ dnorm(0,100), c(bh,bt,bf) ~ dnorm(0,10), sigma ~ dunif(0,10) ), data=d, start = list(a = 0)) precis(m5.13) ## Mean StdDev 5.5% 94.5% ## a 4.78 0.52 3.96 5.61 ## bh 1.05 0.05 0.97 1.13 ## bt -0.30 0.19-0.60 0.01 ## bf -3.30 0.22-3.65-2.96 ## sigma 0.86 0.06 0.76 0.96 48
parameter distributions plot parameter distributions # extract samples post <- extract.samples( m5.13 ) str(post) ## 'data.frame': 10000 obs. of 5 variables: ## $ a : num 4.57 5.58 3.94 4.54 4.8... ## $ bh : num 1.04 0.976 1.131 1.083 1.058... ## $ bt : num -0.142-0.285-0.273-0.415-0.251... ## $ bf : num -2.86-3.1-3.16-3.37-3.33... ## $ sigma: num 1.023 0.907 0.767 0.979 0.887... # plot each parameter distribution bh.plot <- parameter.dist(parameter = "bh", values = post$bh) bt.plot <- parameter.dist(parameter = "bt", values = post$bt) # display parameter distributions plot_grid(bh.plot, bt.plot, labels=c("bh", "bt"), ncol = 2, nrow = 1) bh 8 bt 2.0 6 1.5 density 4 density 1.0 2 0.5 0 0.0 0.9 1.0 1.1 1.2 bh 1.0 0.5 0.0 bt 0.2 0.2 0.0 0.2 0.4 0.977 1.052 1.131 HPDI mean HPDI 0.0 0.2 0.4 0.615 0.298 0.008 HPDI mean HPDI 0.95 1.00 1.05 1.10 1.15 bh 0.75 0.50 0.25 0.00 bt map ## R code 5.43 m5.14 <- map( alist( h1 ~ dnorm(mu,sigma), 49
mu <- a + bh*h0 + bt*treatment, a ~ dnorm(0,100), c(bh,bt) ~ dnorm(0,10), sigma ~ dunif(0,10) ), data=d, start = list(a = 0)) precis(m5.14) ## Mean StdDev 5.5% 94.5% ## a 4.23 0.94 2.73 5.74 ## bh 0.96 0.09 0.82 1.10 ## bt 0.92 0.32 0.42 1.43 ## sigma 1.57 0.11 1.39 1.75 plot parameter distributions # extract samples post <- extract.samples( m5.14 ) str(post) ## 'data.frame': 10000 obs. of 4 variables: ## $ a : num 3.91 6.62 4.32 3.62 6.05... ## $ bh : num 0.975 0.749 0.959 1.003 0.8... ## $ bt : num 0.93 1.109 1.078 1.219 0.333... ## $ sigma: num 1.55 1.67 1.54 1.49 1.59... # plot each parameter distribution a.plot <- parameter.dist(parameter = "a", values = post$a) bh.plot <- parameter.dist(parameter = "bh", values = post$bh) bt.plot <- parameter.dist(parameter = "bt", values = post$bt) sigma.plot <- parameter.dist(parameter = "sigma", values = post$sigma) # display parameter distributions plot_grid(a.plot, bh.plot, bt.plot, sigma.plot, labels=c("a", "bh", "bt", "sigma"), ncol = 2, nrow = 2) 50
a 0.4 bh 4 0.3 3 density 0.2 density 2 0.1 1 0.0 0 0 2 4 6 8 a 0.8 1.0 1.2 bh 0.2 0.2 0.0 0.2 0.4 2.767 4.239 5.775 HPDI mean HPDI 0.0 0.2 0.4 0.827 0.959 1.105 HPDI mean HPDI bt 3 4 5 6 a sigma 0.8 0.9 1.0 1.1 bh 1.0 3 density 0.5 density 2 1 0.0 0 0.0 0.5 1.0 1.5 2.0 bt 1.3 1.5 1.7 1.9 2.1 sigma 0.2 0.2 0.0 0.2 0.4 0.417 0.919 1.436 HPDI mean HPDI 0.0 0.2 0.4 1.394 1.57 1.749 HPDI mean HPDI 0.4 0.8 1.2 1.6 bt 1.4 1.5 1.6 1.7 1.8 sigma 51
Dobe!Kung library(rethinking) library(ggplot2) library(cowplot) theme_set(theme_gray()) data ## R code 5.44 data(howell1) d <- Howell1 str(d) ## 'data.frame': 544 obs. of 4 variables: ## $ height: num 152 140 137 157 145... ## $ weight: num 47.8 36.5 31.9 53 41.3... ## $ age : num 63 63 65 41 51 35 32 27 19 54... ## $ male : int 1 0 0 1 0 1 0 1 0 1... exploratory data analysis d2 <- d d2$gender <- factor(d2$male, labels = c("female", "male")) str(d2) ## 'data.frame': 544 obs. of 5 variables: ## $ height: num 152 140 137 157 145... ## $ weight: num 47.8 36.5 31.9 53 41.3... ## $ age : num 63 63 65 41 51 35 32 27 19 54... ## $ male : int 1 0 0 1 0 1 0 1 0 1... ## $ gender: Factor w/ 2 levels "female","male": 2 1 1 2 1 2 1 2 1 2... ggplot(d2, aes(weight, height, color = gender)) + geom_point(shape = 20) + scale_color_manual(values = c("skyblue", "sienna")) 52
150 height gender female male 100 50 20 40 60 weight model h i Normal(µ i, σ) µ i = α + β m m i α Normal(178, 100) β m Normal(0, 10) σ Uniform(0, 50) map ## R code 5.45 m5.15 <- map( alist( height ~ dnorm( mu, sigma ), mu <- a + bm*male, a ~ dnorm( 178, 100 ), bm ~ dnorm( 0, 10 ), sigma ~ dunif( 0, 50 ) ), data=d, start = list(a = mean(d$height))) precis(m5.15) 53
## Mean StdDev 5.5% 94.5% ## a 134.83 1.59 132.29 137.37 ## bm 7.28 2.28 3.63 10.93 ## sigma 27.31 0.83 25.99 28.63 PI for male height ## R code 5.46 post <- extract.samples(m5.15) mu.male <- post$a + post$bm PI(mu.male) ## 5% 94% ## 139.4423 144.8321 reparametrize the model model h i Normal(µ i, σ) µ i = β f f i + β m m i β f Normal(178, 100) β m Normal(178, 100) σ Uniform(0, 50) map ## R code 5.47 m5.15b <- map( alist( height ~ dnorm( mu, sigma ), mu <- af*(1-male) + am*male, af ~ dnorm( 178, 100 ), am ~ dnorm( 178, 100 ), sigma ~ dunif( 0, 50 ) ), data=d, start = list(af = mean(d$height), am = mean(d$height))) precis(m5.15b) ## Mean StdDev 5.5% 94.5% ## af 134.64 1.61 132.06 137.22 ## am 142.33 1.70 139.61 145.05 ## sigma 27.31 0.83 25.98 28.63 54
parameter distributions # extract samples post <- extract.samples( m5.15b ) str(post) ## 'data.frame': 10000 obs. of 3 variables: ## $ af : num 132 137 135 132 132... ## $ am : num 142 141 141 143 143... ## $ sigma: num 27 27 27.8 25.4 27.6... # plot each parameter distribution af.plot <- parameter.dist(parameter = "af", values = post$af) am.plot <- parameter.dist(parameter = "am", values = post$am) # display parameter distributions plot_grid(af.plot, am.plot, labels=c("af", "am"), ncol = 2, nrow = 1) af 0.25 am 0.20 0.20 density 0.15 0.10 density 0.15 0.10 0.05 0.05 0.00 0.00 128 132 136 140 af 140 145 150 am 0.2 0.2 0.0 0.2 0.4 132.097 134.626 137.194 HPDI mean HPDI 0.0 0.2 0.4 139.717 142.329 145.143 HPDI mean HPDI 132 134 136 138 af 140 142 144 146 am milk data ## R code 5.48 data(milk) d <- milk unique(d$clade) ## [1] Strepsirrhine New World Monkey Old World Monkey Ape 55
## Levels: Ape New World Monkey Old World Monkey Strepsirrhine ## R code 5.49 ( d$clade.nwm <- ifelse( d$clade=="new World Monkey", 1, 0 ) ) ## [1] 0 0 0 0 0 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ## R code 5.50 d$clade.owm <- ifelse( d$clade=="old World Monkey", 1, 0 ) d$clade.s <- ifelse( d$clade=="strepsirrhine", 1, 0 ) map ## R code 5.51 m5.16 <- map( alist( kcal.per.g ~ dnorm( mu, sigma ), mu <- a + b.nwm*clade.nwm + b.owm*clade.owm + b.s*clade.s, a ~ dnorm( 0.6, 10 ), b.nwm ~ dnorm( 0, 1 ), b.owm ~ dnorm( 0, 1 ), b.s ~ dnorm( 0, 1 ), sigma ~ dunif( 0, 10 ) ), data=d ) precis(m5.16) ## Mean StdDev 5.5% 94.5% ## a 0.55 0.04 0.49 0.61 ## b.nwm 0.17 0.05 0.08 0.25 ## b.owm 0.24 0.06 0.15 0.34 ## b.s -0.04 0.06-0.14 0.06 ## sigma 0.11 0.02 0.09 0.14 analysis ## R code 5.52 # sample posterior post <- extract.samples(m5.16) # compute averages for each category mu.ape <- post$a mu.nwm <- post$a + post$b.nwm mu.owm <- post$a + post$b.owm mu.s <- post$a + post$b.s # summarize using precis precis( data.frame(mu.ape,mu.nwm,mu.owm,mu.s) ) ## Mean StdDev 0.89 0.89 ## mu.ape 0.55 0.04 0.49 0.61 ## mu.nwm 0.71 0.04 0.65 0.77 ## mu.owm 0.79 0.05 0.72 0.86 56
## mu.s 0.51 0.05 0.42 0.59 ## R code 5.53 diff.nwm.owm <- mu.nwm - mu.owm quantile( diff.nwm.owm, probs=c(0.025,0.5,0.975) ) ## 2.5% 50% 97.5% ## -0.19156945-0.07336990 0.04171098 unique intercepts ## R code 5.54 ( d$clade_id <- coerce_index(d$clade) ) ## [1] 4 4 4 4 4 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 1 1 1 1 1 1 1 1 1 ## R code 5.55 m5.16_alt <- map( alist( kcal.per.g ~ dnorm( mu, sigma ), mu <- a[clade_id], a[clade_id] ~ dnorm( 0.6, 10 ), sigma ~ dunif( 0, 10 ) ), data=d ) precis( m5.16_alt, depth=2 ) ## Mean StdDev 5.5% 94.5% ## a[1] 0.55 0.04 0.48 0.61 ## a[2] 0.71 0.04 0.65 0.78 ## a[3] 0.79 0.05 0.71 0.86 ## a[4] 0.51 0.05 0.43 0.59 ## sigma 0.11 0.02 0.09 0.14 ## R code 5.55 m5.16_alt <- map( alist( kcal.per.g ~ dnorm( mu, sigma ), mu <- a[clade_id], a[clade_id] ~ dnorm( 0.6, 10 ), sigma ~ dunif( 0, 10 ) ), data=d ) precis( m5.16_alt, depth=2 ) ## Mean StdDev 5.5% 94.5% ## a[1] 0.55 0.04 0.48 0.61 ## a[2] 0.71 0.04 0.65 0.78 ## a[3] 0.79 0.05 0.71 0.86 ## a[4] 0.51 0.05 0.43 0.59 ## sigma 0.11 0.02 0.09 0.14 57
lm # ## R code 5.56 # m5.17 <- lm( y ~ 1 + x, data=d ) # m5.18 <- lm( y ~ 1 + x + z + w, data=d ) # # ## R code 5.57 # m5.17 <- lm( y ~ 1 + x, data=d ) # m5.19 <- lm( y ~ x, data=d ) # # ## R code 5.58 # m5.20 <- lm( y ~ 0 + x, data=d ) # m5.21 <- lm( y ~ x - 1, data=d ) # # ## R code 5.59 # m5.22 <- lm( y ~ 1 + as.factor(season), data=d ) # # ## R code 5.60 # d$x2 <- d$x^2 # d$x3 <- d$x^3 # m5.23 <- lm( y ~ 1 + x + x2 + x3, data=d ) # # ## R code 5.61 # m5.24 <- lm( y ~ 1 + x + I(x^2) + I(x^3), data=d ) ## R code 5.62 data(cars) glimmer( dist ~ speed, data=cars ) ## alist( ## dist ~ dnorm( mu, sigma ), ## mu <- Intercept + ## b_speed*speed, ## Intercept ~ dnorm(0,10), ## b_speed ~ dnorm(0,10), ## sigma ~ dcauchy(0,2) ## ) 58