mlr-org · cxzhang4 · Feb 6, 2025 · Feb 6, 2025 · Feb 13, 2025 · Feb 14, 2025
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -113,6 +113,7 @@ Collate:
     'PipeOpTorchAvgPool.R'
     'PipeOpTorchBatchNorm.R'
     'PipeOpTorchBlock.R'
+    'PipeOpTorchCLS.R'
     'PipeOpTorchCallbacks.R'
     'PipeOpTorchConv.R'
     'PipeOpTorchConvTranspose.R'
@@ -131,6 +132,7 @@ Collate:
     'PipeOpTorchReshape.R'
     'PipeOpTorchSoftmax.R'
     'PipeOpTorchTokenizer.R'
+    'PipeOpTorchTransformerLayer.R'
     'Select.R'
     'TaskClassif_cifar.R'
     'TaskClassif_lazy_iris.R'

diff --git a/NAMESPACE b/NAMESPACE
@@ -106,6 +106,7 @@ export(PipeOpTorchFlatten)
 export(PipeOpTorchFn)
 export(PipeOpTorchGELU)
 export(PipeOpTorchGLU)
+export(PipeOpTorchGeGLU)
 export(PipeOpTorchHardShrink)
 export(PipeOpTorchHardSigmoid)
 export(PipeOpTorchHardTanh)
@@ -133,6 +134,7 @@ export(PipeOpTorchModelRegr)
 export(PipeOpTorchOptimizer)
 export(PipeOpTorchPReLU)
 export(PipeOpTorchRReLU)
+export(PipeOpTorchReGLU)
 export(PipeOpTorchReLU)
 export(PipeOpTorchReLU6)
 export(PipeOpTorchReshape)

diff --git a/NEWS.md b/NEWS.md
@@ -56,6 +56,16 @@
 * The `dataset` of a learner must no longer return the tensors on the specified `device`,
   which allows for parallel dataloading on GPUs.
 * `PipeOpBlock` should no longer create ID clashes with other PipeOps in the graph (#260).
+  Also, the improvement is calculated as the difference between the current and the best score,
+  not the current and the previous score.
+* feat: Added multimodal melanoma and cifar{10, 100} example tasks.
+* feat: Added a callback to iteratively unfreeze parameters for finetuning.
+* fix: torch learners can now be used with `AutoTuner`.
+* feat: Added different learning rate schedulers as callbacks.
+* feat: `PipeOpBlock` should no longer create ID clashes with other PipeOps in the graph (#260)
+* fix: `device` is no longer part of the `dataset` which allows for parallel dataloading
+  on GPUs.
+* feat: Add tokenizers for numeric and categorical features.
 
 # mlr3torch 0.1.2
 

diff --git a/R/PipeOpTorchActivation.R b/R/PipeOpTorchActivation.R
@@ -798,3 +798,121 @@ PipeOpTorchGLU = R6Class("PipeOpTorchGLU",
 )
 
 register_po("nn_glu", PipeOpTorchGLU)
+
+reglu <- function(x) {
+  assert_true(tail(x$shape, 1) %% 2 == 0)
+  chunked = x$chunk(2, dim=-1)
+  a = chunked[[1]]
+  b = chunked[[2]]
+  return(a * nnf_relu(b))
+}
+
+geglu <- function(x) {
+  assert_true(tail(x$shape, 1) %% 2 == 0)
+  chunked = x$chunk(2, dim=-1)
+  a = chunked[[1]]
+  b = chunked[[2]]
+  return(a * nnf_gelu(b))
+}
+
+nn_reglu = nn_module(
+  "nn_reglu",
+  forward = function(input) {
+    return(reglu(input))
+  }
+)
+
+nn_geglu = nn_module(
+  "nn_geglu",
+  forward = function(input) {
+    return(geglu(input))
+  }
+)
+
+#' @title ReGLU Activation Function
+#'
+#' @description
+#' Regularized Gated Linear Unit (ReGLU) activation function.
+#' @section Parameters:
+#' No parameters.
+#' @templateVar id nn_reglu
+#' @template pipeop_torch_channels_default
+#' @template pipeop_torch
+#' @template pipeop_torch_example
+#'
+#' @export
+PipeOpTorchReGLU = R6Class("PipeOpTorchReGLU",
+  inherit = PipeOpTorch,
+  public = list(
+    #' @description Creates a new instance of this [R6][R6::R6Class] class.
+    #' @template params_pipelines
+    initialize = function(id = "nn_reglu", param_vals = list()) {
+      param_set = ps()
+      super$initialize(
+        id = id,
+        param_set = param_set,
+        param_vals = param_vals,
+        module_generator = nn_reglu,
+        tags = "activation"
+      )
+    }
+  ),
+  private = list(
+    .shapes_out = function(shapes_in, param_vals, task) {
+      shape = shapes_in[[1L]]
+      d_new = tail(shape, 1) / 2
+      if (test_integerish(d_new)) {
+        shape[length(shape)] = d_new
+        list(shape)
+      } else {
+        stopf("Last dimension of input tensor must be divisible by 2.")
+      }
+    }
+  )
+)
+
+register_po("nn_reglu", PipeOpTorchReGLU)
+
+#' @title GeGLU Activation Function
+#'
+#' @description
+#' Gaussian Error Linear Unit Gated Linear Unit (GeGLU) activation function.
+#' @section Parameters:
+#' No parameters.
+#' @templateVar id nn_geglu
+#' @template pipeop_torch_channels_default
+#' @template pipeop_torch
+#' @template pipeop_torch_example
+#'
+#' @export
+PipeOpTorchGeGLU = R6Class("PipeOpTorchGeGLU",
+  inherit = PipeOpTorch,
+  public = list(
+    #' @description Creates a new instance of this [R6][R6::R6Class] class.
+    #' @template params_pipelines
+    initialize = function(id = "nn_geglu", param_vals = list()) {
+      param_set = ps()
+      super$initialize(
+        id = id,
+        param_set = param_set,
+        param_vals = param_vals,
+        module_generator = nn_geglu,
+        tags = "activation"
+      )
+    }
+  ),
+  private = list(
+    .shapes_out = function(shapes_in, param_vals, task) {
+      shape = shapes_in[[1L]]
+      d_new = tail(shape, 1) / 2
+      if (test_integerish(d_new)) {
+        shape[length(shape)] = d_new
+        list(shape)
+      } else {
+        stopf("Last dimension of input tensor must be divisible by 2.")
+      }
+    }
+  )
+)
+
+register_po("nn_geglu", PipeOpTorchGeGLU)
diff --git a/R/PipeOpTorchCLS.R b/R/PipeOpTorchCLS.R
@@ -0,0 +1,71 @@
+#' @title PipeOpTorchCLS
+#' @description PipeOp that concatenates a CLS token to the input
+#' TODO: describe exactly where it is concatenated
+PipeOpTorchCLS = R6::R6Class("PipeOpTorchCLS",
+  inherit = PipeOpTorch,
+  public = list(
+    #' @description Create a new instance of this [R6][R6::R6Class] class.
+    #' @param id (`character(1)`)\cr
+    #'   Identifier of the resulting object.
+    initialize = function(id = "cls", param_vals = list()) {
+      param_set = ps(
+        d_token = p_uty(custom_check = function(input) {
+          check_integerish(input, lower = 1L, any.missing = FALSE, len = 1)
+        }),
+        initialization = p_fct(levels = c("uniform", "normal"))
+      )
+
+      super$initialize(
+        id = id,
+        module_generator = nn_cls_token,
+        param_vals = param_vals,
+        param_set = param_set
+      )
+    }
+  ),
+  private = list(
+    .shapes_out = function(shapes_in, param_vals, task) {
+      # TODO: add an assertion on the number of dimensions? 
+      # this should always work for tabular data but maybe wouldn't work if we were trying to do NLP
+      # generally feels hacky
+      shapes_out = shapes_in$input
+      shapes_out[2] = shapes_out[2] + 1
+      return(list(shapes_out))
+    }
+  )
+)
+mlr3pipelines::mlr_pipeops$add("cls", PipeOpTorchCLS)
+
+initialize_token_ = function(x, d, initialization="") {
+  assert_choice(initialization, c("uniform", "normal"))
+  d_sqrt_inv = 1 / sqrt(d)
+  if (initialization == "uniform") {
+    return(nn_init_uniform_(x, a = -d_sqrt_inv, b = d_sqrt_inv))
+  } else {
+    return(nn_init_normal_(x, std=d_sqrt_inv))
+  }
+}
+
+nn_cls_token = nn_module(
+  "nn_cls_token",
+  initialize = function(d_token, initialization) {
+    self$d_token = d_token
+    self$weight = nn_parameter(torch_empty(d_token))
+    self$initialization = initialization
+    self$reset_parameters()
+  },
+  reset_parameters = function() {
+    initialize_token_(self$weight, d = self$d_token, self$initialization)
+  },
+  expand = function(...) {
+    leading_dimensions = list(...)
+    if(length(leading_dimensions) == 0) {
+      return(self$weight)
+    }
+    new_dims = rep(1, length(leading_dimensions) - 1)
+    return(self$weight$view(c(new_dims, -1))$expand(c(leading_dimensions, -1)))
+  },
+  forward = function(input) {
+    return(torch_cat(list(input, self$expand(input$shape[1], 1)), dim=2)) # the length of tensor, multiplies all dimensions
+  }
+)