Multi-Kernel Hyperparameter Tuning via Optuna — tune_random_machines

Optimized version of tune_random_machines that uses Bayesian Optimization (Optuna). It iterates over a list of kernel configurations and runs a parallelized Optuna search for each one to find the best hyperparameters.

tune_random_machines_optuna(
  data,
  time_col = "t",
  delta_col = "delta",
  kernel_mix,
  search_spaces,
  n_trials = 50L,
  cv = 5L,
  seed = NULL,
  verbose = 1L,
  cores = parallel::detectCores(),
  ...
)

Arguments

data: A data.frame containing training data.
time_col: Name of the time column.
delta_col: Name of the event column.
kernel_mix: A named list of base kernel configurations. This serves as the baseline. Keys must match those in search_spaces.
search_spaces: A named list of search spaces. Each element should be a list of parameters defined using opt_float, opt_int, etc.
n_trials: Integer. Total number of trials per kernel.
cv: Integer. Number of CV folds.
seed: Integer. Random seed.
verbose: Integer/Logical. Verbosity level.
cores: Integer. Number of parallel cores to use for the search.
...: Additional global arguments passed to all kernels.

Value

An object of class "random_machines_tune_optuna" which also inherits from "random_machines_tune" to ensure compatibility with generic functions like as_kernels.

Details

This function is the "Bridge" between the raw tuning and the ensemble training. It allows you to tune standard kernels (like "linear", "rbf") and custom function-based kernels (like Wavelet) in a single call.

The results can be passed to as_kernels to generate the final list of kernels for random_machines.

Examples

if (FALSE) { # \dontrun{
if (reticulate::py_module_available("optuna") && requireNamespace("parallel")) {
  library(FastSurvivalSVM)
  
  # =========================================================================
  # 1. Data Generation
  # =========================================================================
  set.seed(42)
  df_train <- data_generation(n = 250, prop_cen = 0.25)
  df_test  <- data_generation(n = 100, prop_cen = 0.25)
  
  # =========================================================================
  # 2. Define Custom Kernel Functions (Math Only)
  # =========================================================================
  
  # Custom Polynomial Kernel: (x'z + coef0)^degree
  my_poly <- function(x, z, degree, coef0) {
    (sum(x * z) + coef0)^degree
  }
  
  # Custom Wavelet Kernel
  my_wavelet <- function(x, z, A) {
    u <- (as.numeric(x) - as.numeric(z)) / A
    prod(cos(1.75 * u) * exp(-0.5 * u^2))
  }
  
  # =========================================================================
  # 3. Tuning Workflow (Full Pipeline)
  # =========================================================================
  
  # A. Base Configurations (Fixed settings)
  # rank_ratio = 0.0 implies Regression mode (Learning Survival Time)
  kernel_mix <- list(
    linear_std = list(kernel = "linear", rank_ratio = 0.0),
    rbf_std    = list(kernel = "rbf",    rank_ratio = 0.0),
    poly_my    = list(kernel = my_poly,  rank_ratio = 0.0),
    wave_my    = list(kernel = my_wavelet, rank_ratio = 0.0)
  )
  
  # B. Search Spaces (Ranges for Optuna)
  search_spaces <- list(
    # Tune 'alpha' for Linear
    linear_std = list(
      alpha = opt_float(0.01, 10, log = TRUE)
    ),
    # Tune 'alpha' and 'gamma' for RBF
    rbf_std = list(
      alpha = opt_float(0.01, 10, log = TRUE),
      gamma = opt_float(0.001, 1, log = TRUE)
    ),
    # Tune 'alpha', 'degree', 'coef0' for Custom Poly
    poly_my = list(
      alpha  = opt_float(0.01, 10, log = TRUE),
      degree = opt_int(2, 4),
      coef0  = opt_float(0, 2)
    ),
    # Tune 'alpha' and 'A' for Custom Wavelet
    wave_my = list(
      alpha = opt_float(0.01, 10, log = TRUE),
      A     = opt_float(0.5, 2.5)
    )
  )
  
  # C. Run Optuna Optimization using ALL available cores
  #    - Spawns workers on all cores.
  #    - Each worker connects to a shared SQLite DB to avoid conflicts.
  #    - Kernels are tuned sequentially, but trials run in parallel.
  cat("Starting Optuna Tuning...\n")
  tune_res <- tune_random_machines_optuna(
    data          = df_train,
    time_col      = "tempo",
    delta_col     = "cens",
    kernel_mix    = kernel_mix,
    search_spaces = search_spaces,
    n_trials      = 20,    # 20 trials per kernel
    cv            = 3,     # 3-fold CV inside each trial
    cores         = parallel::detectCores(), # Use all cores
    seed          = 123
  )
  
  print(tune_res)
  
  # =========================================================================
  # 4. Train Random Machines (Ensemble)
  # =========================================================================
  
  # D. Extract best parameters found by Optuna
  final_kernels <- as_kernels(tune_res, kernel_mix)
  
  cat("Training Random Machines with Optimized Kernels...\n")
  model_rm <- random_machines(
    data         = df_train,
    newdata      = df_test,
    time_col     = "tempo",
    delta_col    = "cens",
    kernels      = final_kernels, # Use the tuned parameters
    B            = 50,            # Number of bootstrap samples
    prop_holdout = 0.2,           # Internal holdout for weights
    cores        = parallel::detectCores() # Parallel training
  )
  
  # =========================================================================
  # 5. Evaluate
  # =========================================================================
  print(model_rm)
  
  cidx <- score(model_rm, df_test)
  cat(sprintf("Final Test C-Index: %.4f\n", cidx))
}
} # }