This function finds the optimal parameters of an algorithm using random search

random_search_resample(
  y,
  tune_iters = NULL,
  resampling_method = NULL,
  ALGORITHM = NULL,
  grid_params = NULL,
  DATA = NULL,
  Args = NULL,
  regression = FALSE,
  re_run_params = FALSE,
  UNLABELED_TEST_DATA = NULL,
  ...
)

Arguments

y

a numeric vector

tune_iters

a number

resampling_method

one of 'bootstrap', 'train_test_split', 'cross_validation'

ALGORITHM

a list of parameters

grid_params

a grid of parameters in form of a list

DATA

a list including the data

Args

a list with further arguments of the function

regression

a boolean (TRUE, FALSE)

re_run_params

a boolean (TRUE, FALSE)

UNLABELED_TEST_DATA

either NULL or a data.frame ( matrix ) with the same number of columns as the initial train data

...

ellipsis to allow additional parameters

Value

a list of lists

Details

This function takes a number of arguments (including a grid of parameters) of an algorithm and using random search it returns a list of predictions and parameters for the chosen resampling method.

Author

Lampros Mouselimis

Examples

if (FALSE) { #.......................... # MULTICLASS CLASSIFICATION #.......................... library(kknn) data(glass) str(glass) X = glass[, -c(1, dim(glass)[2])] y1 = glass[, dim(glass)[2]] form <- as.formula(paste('Type ~', paste(names(X),collapse = '+'))) y1 = c(1:length(unique(y1)))[ match(y1, sort(unique(y1))) ] # labels should begin from 1:Inf ALL_DATA = glass ALL_DATA$Type = as.factor(y1) #........................ # randomForest classifier #........................ wrap_grid_args3 = list(ntree = seq(30, 50, 5), mtry = c(2:3), nodesize = seq(5, 15, 5)) res_rf = random_search_resample(as.factor(y1), tune_iters = 15, resampling_method = list(method = 'cross_validation', repeats = NULL, sample_rate = NULL, folds = 5), ALGORITHM = list(package = require(randomForest), algorithm = randomForest), grid_params = wrap_grid_args3, DATA = list(x = X, y = as.factor(y1)), Args = NULL, regression = FALSE, re_run_params = FALSE) #............ # Logit boost #............ #........................... # RWeka::WOW("LogitBoost") : gives info for the parameters of the RWeka control list #........................... lb_lst = list(control = RWeka::Weka_control(H = c(1.0, 0.5), I = seq(10, 30, 5), Q = c(TRUE, FALSE), O = 4)) res_log_boost = random_search_resample(as.factor(y1), tune_iters = 15, resampling_method = list(method = 'cross_validation', repeats = NULL, sample_rate = NULL, folds = 5), ALGORITHM = list(package = require(RWeka), algorithm = LogitBoost), grid_params = lb_lst, DATA = list(formula = form, data = ALL_DATA), Args = NULL, regression = FALSE, re_run_params = FALSE) }