library(tidyverse)
library(gtsummary)
library(dfmtbx)
library(lme4)Mixed-effect regression models for binary outcomes
Dataset description
# Any value above 4 is set to 1, else set to 0.# Remove missing values
data <- data %>%
filter(imps79 != -9) #%>%
# mutate(subejct = factor(subject))data %>%
group_by(week, tx) %>%
count()# A tibble: 14 × 3
# Groups: week, tx [14]
week tx n
<dbl> <dbl> <int>
1 0 0 107
2 0 1 327
3 1 0 105
4 1 1 321
5 2 0 5
6 2 1 9
7 3 0 87
8 3 1 287
9 4 0 2
10 4 1 9
11 5 0 2
12 5 1 7
13 6 0 70
14 6 1 265
Fixed-effects logistic regression model
# Fixed effects model only, ignores clustering from subjects
model_9.3 <- glm(imps79b ~ tx + sweek + tx:sweek,
data = data,
family = binomial(link = "logit"))
broom.mixed::tidy(model_9.3)# A tibble: 4 × 5
term estimate std.error statistic p.value
<chr> <dbl> <dbl> <dbl> <dbl>
1 (Intercept) 3.70 0.441 8.39 4.70e-17
2 tx -0.405 0.483 -0.839 4.01e- 1
3 sweek -1.11 0.233 -4.79 1.71e- 6
4 tx:sweek -0.418 0.256 -1.63 1.02e- 1
broom.mixed::glance(model_9.3)# A tibble: 1 × 8
null.deviance df.null logLik AIC BIC deviance df.residual nobs
<dbl> <int> <dbl> <dbl> <dbl> <dbl> <int> <int>
1 1737. 1602 -681. 1370. 1392. 1362. 1599 1603
Random intercept logistic regression model
# glmer() default is laplace, analogous to METHOD = LAPLACE in PROC GLIMMIX. Set nAGQ to 15 to more closely resemble results METHOD = QUAD
# Fit the generalized linear mixed model
model_9.4 <- glmer(imps79b ~ tx * sweek + (1 | subject),
data = data,
family = binomial(link = "logit"),
nAGQ = 15)
broom.mixed::tidy(model_9.4)# A tibble: 5 × 7
effect group term estimate std.error statistic p.value
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 fixed <NA> (Intercept) 5.39 0.631 8.54 1.33e-17
2 fixed <NA> tx -0.0249 0.653 -0.0381 9.70e- 1
3 fixed <NA> sweek -1.50 0.291 -5.16 2.47e- 7
4 fixed <NA> tx:sweek -1.01 0.334 -3.04 2.37e- 3
5 ran_pars subject sd__(Intercept) 2.12 NA NA NA
broom.mixed::glance(model_9.4)# A tibble: 1 × 7
nobs sigma logLik AIC BIC deviance df.residual
<int> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
1 1603 1 -625. 1260. 1287. 674. 1598
Random intercept and trend logistic regression model
library(glmmTMB)
data <- data %>%
mutate(
sweek_c = sweek - mean(sweek, na.rm = TRUE),
tx = factor(tx))model <- glmmTMB(imps79b ~ tx + sweek + tx:sweek + (1 + sweek | subject),
data = data,
family = binomial(link = "logit"))
summary(model) Family: binomial ( logit )
Formula: imps79b ~ tx + sweek + tx:sweek + (1 + sweek | subject)
Data: data
AIC BIC logLik -2*log(L) df.resid
1173.7 1211.4 -579.9 1159.7 1596
Random effects:
Conditional model:
Groups Name Variance Std.Dev. Corr
subject (Intercept) 2184.9 46.74
sweek 365.9 19.13 -0.81
Number of obs: 1603, groups: subject, 437
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 45.2732 7.6836 5.892 3.81e-09 ***
tx1 0.8314 6.9096 0.120 0.9042
sweek -14.6736 3.1210 -4.702 2.58e-06 ***
tx1:sweek -7.3960 2.9027 -2.548 0.0108 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Fit the random trend logistic regression model
model_9.6 <- glm(imps79b ~ tx + sweek + tx:sweek,
data = data,
family = binomial(link = "logit")
)
broom.mixed::tidy(model_9.6)# A tibble: 4 × 5
term estimate std.error statistic p.value
<chr> <dbl> <dbl> <dbl> <dbl>
1 (Intercept) 3.70 0.441 8.39 4.70e-17
2 tx1 -0.405 0.483 -0.839 4.01e- 1
3 sweek -1.11 0.233 -4.79 1.71e- 6
4 tx1:sweek -0.418 0.256 -1.63 1.02e- 1
broom.mixed::glance(model_9.6)# A tibble: 1 × 8
null.deviance df.null logLik AIC BIC deviance df.residual nobs
<dbl> <int> <dbl> <dbl> <dbl> <dbl> <int> <int>
1 1737. 1602 -681. 1370. 1392. 1362. 1599 1603
set.seed(42)
# Parameters
n_subjects <- 100
n_timepoints <- 6
subject <- rep(1:n_subjects, each = n_timepoints)
time <- rep(1:n_timepoints, times = n_subjects)
# Fixed effect predictor
tx <- rbinom(n_subjects, 1, 0.5)
tx_long <- rep(tx, each = n_timepoints)
# Random effects
intercepts <- rnorm(n_subjects, 0, 1)
slopes <- rnorm(n_subjects, 0.5, 0.2)
# Linear predictor
eta <- intercepts[subject] + slopes[subject] * time + 0.8 * tx_long
prob <- plogis(eta)
y <- rbinom(n_subjects * n_timepoints, 1, prob)
# Final dataset
long_data <- data.frame(
y = y,
time = time,
tx = tx_long,
subject = factor(subject)
)
#
long_data <-
long_data %>%
mutate(sqrt_time = sqrt(time))
write_csv(long_data,
file = "C:\\Users\\rodrica2\\OneDrive\\Web Site\\quarto_website\\personal-site\\stats\\glmer\\data\\long_data.csv")# Fit the random trend logistic regression model, adding random trends seems to really throw off results in these kinds of models when compared to SAS.
model_test <- glmer(y ~ tx + sqrt_time + tx:sqrt_time + (1 + sqrt_time | subject),
data = long_data,
family = binomial(link = "logit"),
control = glmerControl(
optimizer = "optimx",
optCtrl = list(method='nlminb'))
) Loading required namespace: optimx
broom.mixed::tidy(model_test)# A tibble: 7 × 7
effect group term estimate std.error statistic p.value
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 fixed <NA> (Intercept) -2.81 1.28 -2.20 2.78e-2
2 fixed <NA> tx 4.49 1.74 2.59 9.70e-3
3 fixed <NA> sqrt_time 3.08 0.768 4.01 6.01e-5
4 fixed <NA> tx:sqrt_time -2.13 0.932 -2.29 2.21e-2
5 ran_pars subject sd__(Intercept) 5.14 NA NA NA
6 ran_pars subject cor__(Intercept).sqrt_… -0.962 NA NA NA
7 ran_pars subject sd__sqrt_time 2.54 NA NA NA
broom.mixed::glance(model_test)# A tibble: 1 × 7
nobs sigma logLik AIC BIC deviance df.residual
<int> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
1 600 1 -194. 402. 433. 220. 593