Adding internal reduce_instance_dims=False support to tf_utils.reduce_batch_minus_min_and_max_per_key

tf-transform-team · tfx-copybara · commit 8d60ea3ba93f · 2022-06-09T17:16:13.000-07:00
PiperOrigin-RevId: 454047295
diff --git a/tensorflow_transform/tf_utils.py b/tensorflow_transform/tf_utils.py
@@ -1540,7 +1540,9 @@ def reduce_batch_minus_min_and_max(
 
 def reduce_batch_minus_min_and_max_per_key(
     x: common_types.TensorType,
-    key: common_types.TensorType) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
+    key: common_types.TensorType,
+    reduce_instance_dims: bool = True
+) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
   """Computes the -min and max of a tensor x.
 
   Args:
@@ -1552,6 +1554,10 @@ def reduce_batch_minus_min_and_max_per_key(
         everything except values,
         3. The axis=1 index of each element of sparse x matches its index of
         dense key.
+    reduce_instance_dims: A bool indicating whether this should collapse the
+      batch and instance dimensions to arrive at a single scalar output, or only
+      collapse the batch dimension and outputs a vector of the same shape as the
+      input.
   Returns:
     A 3-tuple containing the `Tensor`s (key_vocab, min_per_key, max_per_key).
   """
@@ -1561,10 +1567,16 @@ def reduce_batch_minus_min_and_max_per_key(
   elif x.dtype == tf.uint32 or x.dtype == tf.uint64:
     raise TypeError('Tensor type %r is not supported' % x.dtype)
 
+  if not reduce_instance_dims and isinstance(
+      x, (tf.SparseTensor, tf.RaggedTensor)):
+    raise NotImplementedError(
+        'Elementwise reduction of composite tensors is not supported'
+    )
+
   x, key = _validate_and_get_dense_value_key_inputs(x, key)
 
   def get_batch_max_per_key(tensor, key_uniques):  # pylint: disable=missing-docstring
-    if tensor.get_shape().ndims < 2:
+    if not reduce_instance_dims or tensor.get_shape().ndims < 2:
       row_maxes = tensor
     else:
       row_maxes = tf.reduce_max(
diff --git a/tensorflow_transform/tf_utils_test.py b/tensorflow_transform/tf_utils_test.py
@@ -1627,6 +1627,7 @@ def _reduce_batch_minus_min_and_max(x):
                   values=[3, 2, -1, 3],
                   dense_shape=[4, 5]),
               key=['a', 'a', 'a', 'b'],
+              reduce_instance_dims=True,
               expected_key_vocab=[b'a', b'b'],
               expected_x_minus_min=[1, -3],
               expected_x_max=[3, 3],
@@ -1638,25 +1639,52 @@ def _reduce_batch_minus_min_and_max(x):
               testcase_name='float',
               x=[[1], [5], [2], [3]],
               key=['a', 'a', 'a', 'b'],
+              reduce_instance_dims=True,
               expected_key_vocab=[b'a', b'b'],
               expected_x_minus_min=[-1, -3],
               expected_x_max=[5, 3],
               input_signature=[
                   tf.TensorSpec([None, None], tf.float32),
                   tf.TensorSpec([None], tf.string)
               ]),
+          dict(
+              testcase_name='float_elementwise',
+              x=[[1], [5], [2], [3]],
+              key=['a', 'a', 'a', 'b'],
+              reduce_instance_dims=False,
+              expected_key_vocab=[b'a', b'b'],
+              expected_x_minus_min=[[-1], [-3]],
+              expected_x_max=[[5], [3]],
+              input_signature=[
+                  tf.TensorSpec([None, None], tf.float32),
+                  tf.TensorSpec([None], tf.string)
+              ]),
           dict(
               testcase_name='float3dims',
               x=[[[1, 5], [1, 1]], [[5, 1], [5, 5]], [[2, 2], [2, 5]],
                  [[3, -3], [3, 3]]],
               key=['a', 'a', 'a', 'b'],
+              reduce_instance_dims=True,
               expected_key_vocab=[b'a', b'b'],
               expected_x_minus_min=[-1, 3],
               expected_x_max=[5, 3],
               input_signature=[
                   tf.TensorSpec([None, None, None], tf.float32),
                   tf.TensorSpec([None], tf.string)
               ]),
+          dict(
+              testcase_name='float3dims_elementwise',
+              x=[[[1, 5], [1, 1]], [[5, 1], [5, 5]], [[2, 2], [2, 5]],
+                 [[3, -3], [3, 3]]],
+              key=['a', 'a', 'a', 'b'],
+              reduce_instance_dims=False,
+              expected_key_vocab=[b'a', b'b'],
+              expected_x_minus_min=[[[-1, -1], [-1, -1]], [[-3, 3], [-3, -3]]],
+              expected_x_max=[[[5, 5], [5, 5]], [[3, -3], [3, 3]]],
+              input_signature=[
+                  tf.TensorSpec([None, None, None], tf.float32),
+                  tf.TensorSpec([None], tf.string)
+              ]),
           dict(
               testcase_name='ragged',
               x=tf.compat.v1.ragged.RaggedTensorValue(
@@ -1673,6 +1701,7 @@ def _reduce_batch_minus_min_and_max(x):
                           row_splits=np.array([0, 2, 3, 4, 5])),
                       row_splits=np.array([0, 2, 3, 4])),
                   row_splits=np.array([0, 2, 3])),
+              reduce_instance_dims=True,
               expected_key_vocab=[b'a', b'b'],
               expected_x_minus_min=[-2., -3.],
               expected_x_max=[4., 5.],
@@ -1682,12 +1711,13 @@ def _reduce_batch_minus_min_and_max(x):
               ]),
       ]))
   def test_reduce_batch_minus_min_and_max_per_key(
-      self, x, key, expected_key_vocab, expected_x_minus_min, expected_x_max,
-      input_signature, function_handler):
+      self, x, key, reduce_instance_dims, expected_key_vocab,
+      expected_x_minus_min, expected_x_max, input_signature, function_handler):
 
     @function_handler(input_signature=input_signature)
     def _reduce_batch_minus_min_and_max_per_key(x, key):
-      return tf_utils.reduce_batch_minus_min_and_max_per_key(x, key)
+      return tf_utils.reduce_batch_minus_min_and_max_per_key(
+          x, key, reduce_instance_dims=reduce_instance_dims)
 
     key_vocab, x_minus_min, x_max = _reduce_batch_minus_min_and_max_per_key(
         x, key)