library(tidyverse)
library(brms)
options(brms.backend = "cmdstanr", mc.cores = 2)
Exercise 11
Instruction
!! Remember to set eval: true
in YAML.
Answer questions 1-4 below. Submit the rendered file (PDF/WORD) to Brightspace.
When estimating [an] . . . effect under a cluster design, it is not enough to fit a multilevel model with varying intercepts. The slopes . . . must also be allowed to vary . . . (Gelman & Brown, 2024, How Statistical Challenges and Misreadings of the Literature Combine to Produce Unreplicable Science: An Example From Psychology)
# Function to simulate data
<- function(n_subjects = 18, n_days = 10, beta0 = 250, beta1 = 0,
simulate_data sd_intercept = 25, sd_slope = 10, sd_residual = 25) {
<- rep(1:n_subjects, each = n_days)
subject <- rep(0:(n_days - 1), times = n_subjects)
days
# Random effects
<- rnorm(n_subjects, mean = 0, sd = sd_intercept)
random_intercepts <- rnorm(n_subjects, mean = 0, sd = sd_slope)
random_slopes
# Generate reaction times
<- beta0 + beta1 * days +
reaction +
random_intercepts[subject] * days +
random_slopes[subject] rnorm(n_subjects * n_days, mean = 0, sd = sd_residual)
data.frame(Subject = factor(subject), Days = days, Reaction = reaction)
}
# Simulate one data set
set.seed(1308)
<- simulate_data()
simulated_data <- brm(Reaction ~ Days + (1 | Subject), data = simulated_data,
m_ri iter = 1000, chains = 2,
control = list(adapt_delta = .95))
<- brm(Reaction ~ Days + (Days | Subject), data = simulated_data,
m_rs iter = 1000, chains = 2,
control = list(adapt_delta = .95))
Q1: Among the two models above, which model is better based on LOOIC?
Q2: Examine the posterior distributions for the fixed effect of Days
(i.e., average slope across participants). How are they different in the two models?
Q3: The following code implements a simulation examining the effect of ignoring the random slopes on Type I error rates. How much is that inflated? This will take a few minutes. [Note: I use lme4::lmer()
for frequentist analyses to save time.]
library(lme4)
library(lmerTest)
# Function to perform analysis and check Type I error
<- function(form = Reaction ~ Days + (1 | Subject),
check_type1_error n_simulations = 1000, alpha = 0.05) {
<- numeric(n_simulations)
p_values
for (i in 1:n_simulations) {
<- simulate_data()
data
# Fit model ignoring random slope
<- lmer(form, data = data)
model
# Extract p-value for fixed effect of Days
<- summary(model)$coefficients[2, "Pr(>|t|)"]
p_values[i]
}
# Calculate Type I error rate
<- mean(p_values < alpha)
type1_error_rate return(type1_error_rate)
}
# Run the simulation and check Type I error rate
set.seed(123)
<- check_type1_error()
type1_error_rate_ri print(paste("Type I error rate with random intercepts:", type1_error_rate_ri))
Q4: Repeat Q3, but change the model to one including the random slopes for Days
.
# Run the simulation and check Type I error rate