Mixed-effect regression models for binary outcomes

Dataset description

library(tidyverse)
library(gtsummary)
library(dfmtbx)
library(lme4)
# Any value above 4 is set to 1, else set to 0.
# Remove missing values
data <- data %>%
  filter(imps79 != -9) #%>%
  # mutate(subejct = factor(subject))
data %>%
  group_by(week, tx) %>%
  count()
# A tibble: 14 × 3
# Groups:   week, tx [14]
    week    tx     n
   <dbl> <dbl> <int>
 1     0     0   107
 2     0     1   327
 3     1     0   105
 4     1     1   321
 5     2     0     5
 6     2     1     9
 7     3     0    87
 8     3     1   287
 9     4     0     2
10     4     1     9
11     5     0     2
12     5     1     7
13     6     0    70
14     6     1   265

Fixed-effects logistic regression model

# Fixed effects model only, ignores clustering from subjects
model_9.3 <- glm(imps79b ~ tx + sweek + tx:sweek,
                 data = data,
                 family = binomial(link = "logit")) 

broom.mixed::tidy(model_9.3)
# A tibble: 4 × 5
  term        estimate std.error statistic  p.value
  <chr>          <dbl>     <dbl>     <dbl>    <dbl>
1 (Intercept)    3.70      0.441     8.39  4.70e-17
2 tx            -0.405     0.483    -0.839 4.01e- 1
3 sweek         -1.11      0.233    -4.79  1.71e- 6
4 tx:sweek      -0.418     0.256    -1.63  1.02e- 1
broom.mixed::glance(model_9.3)
# A tibble: 1 × 8
  null.deviance df.null logLik   AIC   BIC deviance df.residual  nobs
          <dbl>   <int>  <dbl> <dbl> <dbl>    <dbl>       <int> <int>
1         1737.    1602  -681. 1370. 1392.    1362.        1599  1603

Random intercept logistic regression model

# glmer() default is laplace, analogous to METHOD = LAPLACE in PROC GLIMMIX. Set nAGQ to 15 to more closely resemble results METHOD = QUAD

# Fit the generalized linear mixed model
model_9.4 <- glmer(imps79b ~ tx * sweek + (1 | subject),
               data = data,
               family = binomial(link = "logit"),
               nAGQ = 15)


broom.mixed::tidy(model_9.4)
# A tibble: 5 × 7
  effect   group   term            estimate std.error statistic   p.value
  <chr>    <chr>   <chr>              <dbl>     <dbl>     <dbl>     <dbl>
1 fixed    <NA>    (Intercept)       5.39       0.631    8.54    1.33e-17
2 fixed    <NA>    tx               -0.0249     0.653   -0.0381  9.70e- 1
3 fixed    <NA>    sweek            -1.50       0.291   -5.16    2.47e- 7
4 fixed    <NA>    tx:sweek         -1.01       0.334   -3.04    2.37e- 3
5 ran_pars subject sd__(Intercept)   2.12      NA       NA      NA       
broom.mixed::glance(model_9.4)
# A tibble: 1 × 7
   nobs sigma logLik   AIC   BIC deviance df.residual
  <int> <dbl>  <dbl> <dbl> <dbl>    <dbl>       <int>
1  1603     1  -625. 1260. 1287.     674.        1598

Random intercept and trend logistic regression model

library(glmmTMB)

data <- data %>%
  mutate(
    sweek_c = sweek - mean(sweek, na.rm = TRUE),
    tx = factor(tx))
model <- glmmTMB(imps79b ~ tx + sweek + tx:sweek + (1 + sweek | subject),
                 data = data,
                 family = binomial(link = "logit"))


summary(model)
 Family: binomial  ( logit )
Formula:          imps79b ~ tx + sweek + tx:sweek + (1 + sweek | subject)
Data: data

      AIC       BIC    logLik -2*log(L)  df.resid 
   1173.7    1211.4    -579.9    1159.7      1596 

Random effects:

Conditional model:
 Groups  Name        Variance Std.Dev. Corr  
 subject (Intercept) 2184.9   46.74          
         sweek        365.9   19.13    -0.81 
Number of obs: 1603, groups:  subject, 437

Conditional model:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept)  45.2732     7.6836   5.892 3.81e-09 ***
tx1           0.8314     6.9096   0.120   0.9042    
sweek       -14.6736     3.1210  -4.702 2.58e-06 ***
tx1:sweek    -7.3960     2.9027  -2.548   0.0108 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Fit the random trend logistic regression model
model_9.6 <- glm(imps79b ~ tx + sweek + tx:sweek,
               data = data,
               family = binomial(link = "logit")
               ) 

broom.mixed::tidy(model_9.6)
# A tibble: 4 × 5
  term        estimate std.error statistic  p.value
  <chr>          <dbl>     <dbl>     <dbl>    <dbl>
1 (Intercept)    3.70      0.441     8.39  4.70e-17
2 tx1           -0.405     0.483    -0.839 4.01e- 1
3 sweek         -1.11      0.233    -4.79  1.71e- 6
4 tx1:sweek     -0.418     0.256    -1.63  1.02e- 1
broom.mixed::glance(model_9.6)
# A tibble: 1 × 8
  null.deviance df.null logLik   AIC   BIC deviance df.residual  nobs
          <dbl>   <int>  <dbl> <dbl> <dbl>    <dbl>       <int> <int>
1         1737.    1602  -681. 1370. 1392.    1362.        1599  1603
set.seed(42)

# Parameters
n_subjects <- 100
n_timepoints <- 6
subject <- rep(1:n_subjects, each = n_timepoints)
time <- rep(1:n_timepoints, times = n_subjects)

# Fixed effect predictor
tx <- rbinom(n_subjects, 1, 0.5)
tx_long <- rep(tx, each = n_timepoints)

# Random effects
intercepts <- rnorm(n_subjects, 0, 1)
slopes <- rnorm(n_subjects, 0.5, 0.2)

# Linear predictor
eta <- intercepts[subject] + slopes[subject] * time + 0.8 * tx_long
prob <- plogis(eta)
y <- rbinom(n_subjects * n_timepoints, 1, prob)

# Final dataset
long_data <- data.frame(
  y = y,
  time = time,
  tx = tx_long,
  subject = factor(subject)
)

#
long_data <- 
    long_data %>%
    mutate(sqrt_time = sqrt(time))


write_csv(long_data, 
file = "C:\\Users\\rodrica2\\OneDrive\\Web Site\\quarto_website\\personal-site\\stats\\glmer\\data\\long_data.csv")
# Fit the random trend logistic regression model, adding random trends seems to really throw off results in these kinds of models when compared to SAS.
model_test <- glmer(y ~ tx + sqrt_time + tx:sqrt_time + (1 + sqrt_time | subject),
               data = long_data,
               family = binomial(link = "logit"),
               control = glmerControl(
                optimizer = "optimx",
                optCtrl = list(method='nlminb'))
               ) 
Loading required namespace: optimx
broom.mixed::tidy(model_test)
# A tibble: 7 × 7
  effect   group   term                    estimate std.error statistic  p.value
  <chr>    <chr>   <chr>                      <dbl>     <dbl>     <dbl>    <dbl>
1 fixed    <NA>    (Intercept)               -2.81      1.28      -2.20  2.78e-2
2 fixed    <NA>    tx                         4.49      1.74       2.59  9.70e-3
3 fixed    <NA>    sqrt_time                  3.08      0.768      4.01  6.01e-5
4 fixed    <NA>    tx:sqrt_time              -2.13      0.932     -2.29  2.21e-2
5 ran_pars subject sd__(Intercept)            5.14     NA         NA    NA      
6 ran_pars subject cor__(Intercept).sqrt_…   -0.962    NA         NA    NA      
7 ran_pars subject sd__sqrt_time              2.54     NA         NA    NA      
broom.mixed::glance(model_test)
# A tibble: 1 × 7
   nobs sigma logLik   AIC   BIC deviance df.residual
  <int> <dbl>  <dbl> <dbl> <dbl>    <dbl>       <int>
1   600     1  -194.  402.  433.     220.         593