End-to-End Workflow: Tuning and Training Random Machines • FastSurvivalSVM

Introduction 🚀

This vignette presents a fully executable, end-to-end workflow for building
Random Survival Machines with the FastSurvivalSVM package.

To guarantee that the vignette runs during rendering (CRAN / pkgdown / local), we deliberately use:

small sample size,
small tuning grids,
cores = 1 everywhere.

The goal is correctness and clarity, not maximum performance.

1. Data Preparation 📦

library(FastSurvivalSVM)

set.seed(42)

df <- data_generation(n = 120, prop_cen = 0.20)

train_idx <- sample(seq_len(nrow(df)), 80)
train_df  <- df[train_idx, ]
test_df   <- df[-train_idx, ]

head(train_df)
#>         tempo cens         x1       x2        x3
#> 92  0.7517396    1  0.5238261 6.073944 2.4780040
#> 10  1.7351947    1  0.9372859 2.359033 2.8224813
#> 96  0.2484542    1  0.1392074 2.074830 0.3553029
#> 110 0.3876448    1  1.1191610 4.500473 0.6483256
#> 27  1.4115221    1  0.7427306 1.038014 0.2989079
#> 97  2.1221840    1 -0.1317387 1.735824 0.7626209

2. Custom Kernels 🧠

2.1 Wavelet Kernel 🌊

wavelet_kernel <- function(x, z, A) {
  u <- (as.numeric(x) - as.numeric(z)) / A
  prod(cos(1.75 * u) * exp(-0.5 * u^2))
}

2.2 Polynomial Kernel 🧮

poly_kernel <- function(x, z, degree, coef0) {
  (sum(as.numeric(x) * as.numeric(z)) + coef0)^degree
}

3. Tuning a Single SVM with tune_fastsvm() 🔧

grid_rbf <- list(
  kernel     = "rbf",
  rank_ratio = 0,
  alpha      = c(0.1, 1),
  gamma      = c(0.01, 0.1)
)

rbf_tune <- tune_fastsvm(
  data       = train_df,
  time_col   = "tempo",
  delta_col  = "cens",
  param_grid = grid_rbf,
  cv         = 3,
  cores      = 1,
  verbose    = 0
)

rbf_tune
#> 
#> ── Grid Search Result ──
#> 
#> ✔ Best C-Index: 0.7793
#> Best Parameters:
#> • alpha: 1
#> • gamma: 0.1
#> • kernel: rbf
#> • rank_ratio: 0

4. Kernel Mix and Parameter Grids 🧩

kernel_mix <- list(
  linear_std = list(kernel = "linear", rank_ratio = 0),
  rbf_std    = list(kernel = "rbf",    rank_ratio = 0),
  wavelet_k  = list(rank_ratio = 0),
  poly_k     = list(rank_ratio = 0)
)

param_grids <- list(
  linear_std = list(alpha = c(0.1, 1)),
  rbf_std    = list(alpha = c(0.1, 1), gamma = c(0.01)),
  wavelet_k  = list(
    kernel = grid_kernel(wavelet_kernel, A = c(1)),
    alpha  = c(0.1, 1)
  ),
  poly_k = list(
    kernel = grid_kernel(poly_kernel, degree = c(2), coef0 = 1),
    alpha  = c(0.1, 1)
  )
)

5. Joint Tuning with tune_random_machines() 🔁

tune_res <- tune_random_machines(
  data        = train_df,
  time_col    = "tempo",
  delta_col   = "cens",
  kernel_mix  = kernel_mix,
  param_grids = param_grids,
  cv          = 3,
  cores       = 1,
  verbose     = 0
)

tune_res
#> 
#> ── Random Machines Tuning Summary ──────────────────────────────────────────────
#>      Kernel   C_Index Status
#>      poly_k 0.7520035     OK
#>     rbf_std 0.6858794     OK
#>  linear_std 0.6362080     OK
#>   wavelet_k 0.6281240     OK

6. Bridging with as_kernels() 🌉

final_kernels <- as_kernels(tune_res, kernel_mix)
final_kernels
#> $linear_std
#> $linear_std$kernel
#> [1] "linear"
#> 
#> $linear_std$rank_ratio
#> [1] 0
#> 
#> $linear_std$alpha
#> [1] 0.1
#> 
#> 
#> $rbf_std
#> $rbf_std$kernel
#> [1] "rbf"
#> 
#> $rbf_std$rank_ratio
#> [1] 0
#> 
#> $rbf_std$alpha
#> [1] 1
#> 
#> $rbf_std$gamma
#> [1] 0.01
#> 
#> 
#> $wavelet_k
#> $wavelet_k$rank_ratio
#> [1] 0
#> 
#> $wavelet_k$alpha
#> [1] 1
#> 
#> $wavelet_k$kernel
#> <FastSurvivalSVM Kernel>: A=1
#> 
#> 
#> $poly_k
#> $poly_k$rank_ratio
#> [1] 0
#> 
#> $poly_k$alpha
#> [1] 1
#> 
#> $poly_k$kernel
#> <FastSurvivalSVM Kernel>: degree=2_coef0=1

7. Training Random Machines 🚜

rm_model <- random_machines(
  data         = train_df,
  newdata      = test_df,
  time_col     = "tempo",
  delta_col    = "cens",
  kernels      = final_kernels,
  B            = 5,
  mtry         = 2,
  crop         = 0.1,
  prop_holdout = 0.20,
  cores        = 1,
  .progress    = FALSE
)
#> 
#> ── 🚀 Random Machines (Kernel Survival SVM) ────────────────────────────────────
#> ℹ Starting Random Machines (B=5, mtry=2) on 1 cores.
#> ℹ Kernel weights via Holdout: 64 training / 16 validation.
#> ℹ Computing kernel weights...
#> ℹ Executing parallel bootstrap...
#> ✔ Done. Valid Models: 5/5. Mean OOB: 0.6977

rm_model
#> 
#> ── 📦 Random Machines (FastKernelSurvivalSVM) ──────────────────────────────────
#> • Models Trained: 5
#> • Features (mtry): 2
#> • Mean OOB C-index: 0.6977
#> • Crop Threshold: 0.1
#> 
#> ── 📊 Kernel Usage (Bootstrap Selection) ──
#> 
#> ────────────────────────────────────────────────────────────────────────────────
#> Kernel   | Count | Probability
#> ────────────────────────────────────────────────────────────────────────────────
#> poly_k     |     2 |      0.4000
#> rbf_std    |     2 |      0.4000
#> linear_std |     1 |      0.2000
#> ────────────────────────────────────────────────────────────────────────────────
#> 
#> ── ⚖️ Kernel Weights (Holdout Probabilities) ──
#> 
#> ────────────────────────────────────────────────────────────────────────────────
#> Kernel   | Probability | Status
#> ────────────────────────────────────────────────────────────────────────────────
#> rbf_std    |      0.3131 | ✅ Selected
#> poly_k     |      0.2967 | ✅ Selected
#> wavelet_k  |      0.2026 | ✅ Selected
#> linear_std |      0.1876 | ✅ Selected
#> ────────────────────────────────────────────────────────────────────────────────

8. Final Evaluation 🎯

c_index <- score(rm_model, test_df)
cat(sprintf("Final C-index on test data: %.4f\n", c_index))
#> Final C-index on test data: 0.7726

Conclusion 🎉

This vignette intentionally uses small grids and datasets so that:

every chunk is executed,
the vignette renders reliably,
users can clearly see the full workflow.

For real experiments, simply increase: n, tuning grids, B, and cores.