library(tidyverse)
library(brms)
options(brms.backend = "cmdstanr", mc.cores = 2)Exercise 11
Instruction
!! Remember to set eval: true in YAML.
Answer questions 1-4 below. Submit the rendered file (PDF/WORD) to Brightspace.
When estimating [an] . . . effect under a cluster design, it is not enough to fit a multilevel model with varying intercepts. The slopes . . . must also be allowed to vary . . . (Gelman & Brown, 2024, How Statistical Challenges and Misreadings of the Literature Combine to Produce Unreplicable Science: An Example From Psychology)
# Function to simulate data
simulate_data <- function(n_subjects = 18, n_days = 10, beta0 = 250, beta1 = 0,
sd_intercept = 25, sd_slope = 10, sd_residual = 25) {
subject <- rep(1:n_subjects, each = n_days)
days <- rep(0:(n_days - 1), times = n_subjects)
# Random effects
random_intercepts <- rnorm(n_subjects, mean = 0, sd = sd_intercept)
random_slopes <- rnorm(n_subjects, mean = 0, sd = sd_slope)
# Generate reaction times
reaction <- beta0 + beta1 * days +
random_intercepts[subject] +
random_slopes[subject] * days +
rnorm(n_subjects * n_days, mean = 0, sd = sd_residual)
data.frame(Subject = factor(subject), Days = days, Reaction = reaction)
}# Simulate one data set
set.seed(1308)
simulated_data <- simulate_data()
m_ri <- brm(Reaction ~ Days + (1 | Subject), data = simulated_data,
iter = 1000, chains = 2,
control = list(adapt_delta = .95))
m_rs <- brm(Reaction ~ Days + (Days | Subject), data = simulated_data,
iter = 1000, chains = 2,
control = list(adapt_delta = .95))Q1: Among the two models above, which model is better based on LOOIC?
Q2: Examine the posterior distributions for the fixed effect of Days (i.e., average slope across participants). How are they different in the two models?
Q3: The following code implements a simulation examining the effect of ignoring the random slopes on Type I error rates. How much is that inflated? This will take a few minutes. [Note: I use lme4::lmer() for frequentist analyses to save time.]
library(lme4)
library(lmerTest)
# Function to perform analysis and check Type I error
check_type1_error <- function(form = Reaction ~ Days + (1 | Subject),
n_simulations = 1000, alpha = 0.05) {
p_values <- numeric(n_simulations)
for (i in 1:n_simulations) {
data <- simulate_data()
# Fit model ignoring random slope
model <- lmer(form, data = data)
# Extract p-value for fixed effect of Days
p_values[i] <- summary(model)$coefficients[2, "Pr(>|t|)"]
}
# Calculate Type I error rate
type1_error_rate <- mean(p_values < alpha)
return(type1_error_rate)
}
# Run the simulation and check Type I error rate
set.seed(123)
type1_error_rate_ri <- check_type1_error()
print(paste("Type I error rate with random intercepts:", type1_error_rate_ri))Q4: Repeat Q3, but change the model to one including the random slopes for Days.
# Run the simulation and check Type I error rate