R/tune_optuna.R
tune_random_machines_optuna.RdOptimized version of tune_random_machines that uses Bayesian Optimization (Optuna).
It iterates over a list of kernel configurations and runs a parallelized
Optuna search for each one to find the best hyperparameters.
tune_random_machines_optuna(
data,
time_col = "t",
delta_col = "delta",
kernel_mix,
search_spaces,
n_trials = 50L,
cv = 5L,
seed = NULL,
verbose = 1L,
cores = parallel::detectCores(),
...
)A data.frame containing training data.
Name of the time column.
Name of the event column.
A named list of base kernel configurations. This serves as the
baseline. Keys must match those in search_spaces.
A named list of search spaces. Each element should be a list
of parameters defined using opt_float, opt_int, etc.
Integer. Total number of trials per kernel.
Integer. Number of CV folds.
Integer. Random seed.
Integer/Logical. Verbosity level.
Integer. Number of parallel cores to use for the search.
Additional global arguments passed to all kernels.
An object of class "random_machines_tune_optuna" which also
inherits from "random_machines_tune" to ensure compatibility with
generic functions like as_kernels.
This function is the "Bridge" between the raw tuning and the ensemble training. It allows you to tune standard kernels (like "linear", "rbf") and custom function-based kernels (like Wavelet) in a single call.
The results can be passed to as_kernels to generate the final
list of kernels for random_machines.
if (FALSE) { # \dontrun{
if (reticulate::py_module_available("optuna") && requireNamespace("parallel")) {
library(FastSurvivalSVM)
# =========================================================================
# 1. Data Generation
# =========================================================================
set.seed(42)
df_train <- data_generation(n = 250, prop_cen = 0.25)
df_test <- data_generation(n = 100, prop_cen = 0.25)
# =========================================================================
# 2. Define Custom Kernel Functions (Math Only)
# =========================================================================
# Custom Polynomial Kernel: (x'z + coef0)^degree
my_poly <- function(x, z, degree, coef0) {
(sum(x * z) + coef0)^degree
}
# Custom Wavelet Kernel
my_wavelet <- function(x, z, A) {
u <- (as.numeric(x) - as.numeric(z)) / A
prod(cos(1.75 * u) * exp(-0.5 * u^2))
}
# =========================================================================
# 3. Tuning Workflow (Full Pipeline)
# =========================================================================
# A. Base Configurations (Fixed settings)
# rank_ratio = 0.0 implies Regression mode (Learning Survival Time)
kernel_mix <- list(
linear_std = list(kernel = "linear", rank_ratio = 0.0),
rbf_std = list(kernel = "rbf", rank_ratio = 0.0),
poly_my = list(kernel = my_poly, rank_ratio = 0.0),
wave_my = list(kernel = my_wavelet, rank_ratio = 0.0)
)
# B. Search Spaces (Ranges for Optuna)
search_spaces <- list(
# Tune 'alpha' for Linear
linear_std = list(
alpha = opt_float(0.01, 10, log = TRUE)
),
# Tune 'alpha' and 'gamma' for RBF
rbf_std = list(
alpha = opt_float(0.01, 10, log = TRUE),
gamma = opt_float(0.001, 1, log = TRUE)
),
# Tune 'alpha', 'degree', 'coef0' for Custom Poly
poly_my = list(
alpha = opt_float(0.01, 10, log = TRUE),
degree = opt_int(2, 4),
coef0 = opt_float(0, 2)
),
# Tune 'alpha' and 'A' for Custom Wavelet
wave_my = list(
alpha = opt_float(0.01, 10, log = TRUE),
A = opt_float(0.5, 2.5)
)
)
# C. Run Optuna Optimization using ALL available cores
# - Spawns workers on all cores.
# - Each worker connects to a shared SQLite DB to avoid conflicts.
# - Kernels are tuned sequentially, but trials run in parallel.
cat("Starting Optuna Tuning...\n")
tune_res <- tune_random_machines_optuna(
data = df_train,
time_col = "tempo",
delta_col = "cens",
kernel_mix = kernel_mix,
search_spaces = search_spaces,
n_trials = 20, # 20 trials per kernel
cv = 3, # 3-fold CV inside each trial
cores = parallel::detectCores(), # Use all cores
seed = 123
)
print(tune_res)
# =========================================================================
# 4. Train Random Machines (Ensemble)
# =========================================================================
# D. Extract best parameters found by Optuna
final_kernels <- as_kernels(tune_res, kernel_mix)
cat("Training Random Machines with Optimized Kernels...\n")
model_rm <- random_machines(
data = df_train,
newdata = df_test,
time_col = "tempo",
delta_col = "cens",
kernels = final_kernels, # Use the tuned parameters
B = 50, # Number of bootstrap samples
prop_holdout = 0.2, # Internal holdout for weights
cores = parallel::detectCores() # Parallel training
)
# =========================================================================
# 5. Evaluate
# =========================================================================
print(model_rm)
cidx <- score(model_rm, df_test)
cat(sprintf("Final Test C-Index: %.4f\n", cidx))
}
} # }