Skip to contents

Performs Item Response Theory modeling (1PL, 2PL, 3PL, and Graded Response Model) using RTMB. Missing values (NA) in the data are automatically removed internally for efficient computation.

Usage

rtmb_irt(
  data,
  model = c("2PL", "1PL", "3PL"),
  type = c("binary", "ordered"),
  prior = list(a_log_mean = 0, a_log_sd = 0.5, b_mean = 0, b_sd = 2.5, c_alpha = 1,
    c_beta = 4, theta_sd = 1),
  init = NULL
)

Arguments

data

A data frame or matrix of item responses (N persons x J items).

model

Character string for the model type: "1PL", "2PL", or "3PL".

type

Character string for the data type: "binary" or "ordered".

prior

List of hyperparameters for prior distributions.

init

List of initial values.

Examples

  if (FALSE) { # \dontrun{
  # --- 1. Binary Data (e.g., correct/incorrect answers) ---
  # Simulate binary response data for 100 persons and 5 items
  set.seed(123)
  bin_data <- matrix(rbinom(500, size = 1, prob = 0.6), nrow = 100, ncol = 5)
  colnames(bin_data) <- paste0("Item", 1:5)

  # Introduce some missing values (NA) to demonstrate automatic handling
  #bin_data[sample(1:500, 10)] <- NA

  # Fit a 2-Parameter Logistic (2PL) model
  fit_2pl <- rtmb_irt(data = bin_data, model = "2PL", type = "binary")

  # Maximum A Posteriori (MAP) estimation
  map_2pl <- fit_2pl$optimize()
  map_2pl$summary()

  # --- 2. Ordered Data (e.g., Likert scale) ---
  # Simulate ordered response data (categories 1 to 5) with a true latent structure
  set.seed(123)
  N <- 100
  J <- 5
  K <- 4

  # True parameters
  theta <- rnorm(N, 0, 1)          # Person abilities (latent traits)
  a <- runif(J, 0.8, 2.0)          # Item discriminations

  # Item-specific thresholds (must be strictly increasing)
  b <- matrix(NA, nrow = J, ncol = K - 1)
  for (j in 1:J) {
    b[j, ] <- sort(c(-1.5, 0, 1.5) + rnorm(3, 0, 0.2))
  }

  ord_data <- matrix(NA, nrow = N, ncol = J)
  colnames(ord_data) <- paste0("Item", 1:J)

  # Generate responses based on the Graded Response Model
  for (i in 1:N) {
    for (j in 1:J) {
      # Latent continuous response (eta + standard logistic noise)
      eta <- a[j] * theta[i]
      y_star <- eta + rlogis(1)

      # Apply thresholds to determine the observed category (1 to 4)
      category <- 1
      for (k in 1:(K - 1)) {
        if (y_star > b[j, k]) category <- k + 1
      }
      ord_data[i, j] <- category
    }
  }

  # Fit a Graded Response Model (2PL for ordered data)
  fit_ord <- rtmb_irt(data = ord_data, model = "2PL", type = "ordered")
  fit_ord$print_code()

  map_ord <- fit_ord$optimize()
  map_ord$summary()

  # Note: For complex models like the Graded Response Model, the Wald CI from optimize()
  # may become extremely wide due to parameter transformations.
  # MCMC sampling is recommended for reliable interval estimation.

  # MCMC sampling for the ordered model (chains and iterations reduced)
  mcmc_ord <- fit_ord$sample(sampling = 500, warmup = 500, chains = 2)
  mcmc_ord$summary()
 } # }