diff --git a/__af_version__.py b/__af_version__.py
index 9b8a5f40c..dfa78b826 100644
--- a/__af_version__.py
+++ b/__af_version__.py
@@ -9,6 +9,6 @@
 # http://arrayfire.com/licenses/BSD-3-Clause
 ########################################################
 
-version = "3.5"
-release = "20170718"
+version = "3.7"
+release = "20200213"
 full_version = version + "." + release
diff --git a/arrayfire/__init__.py b/arrayfire/__init__.py
index 538085bb3..5cb009951 100644
--- a/arrayfire/__init__.py
+++ b/arrayfire/__init__.py
@@ -74,6 +74,7 @@
 from .timer      import *
 from .random     import *
 from .sparse     import *
+from .ml         import *
 
 # do not export default modules as part of arrayfire
 del ct
diff --git a/arrayfire/algorithm.py b/arrayfire/algorithm.py
index 36b5d79b8..d5adbcce5 100644
--- a/arrayfire/algorithm.py
+++ b/arrayfire/algorithm.py
@@ -44,6 +44,31 @@ def _nan_reduce_all(a, c_func, nan_val):
     imag = imag.value
     return real if imag == 0 else real + imag * 1j
 
+def _FNSD(dim, dims):
+    if dim >= 0:
+        return int(dim)
+
+    fnsd = 0
+    for i, d in enumerate(dims):
+        if d > 1:
+            fnsd = i
+            break
+    return int(fnsd)
+
+def _rbk_dim(keys, vals, dim, c_func):
+    keys_out = Array()
+    vals_out = Array()
+    rdim = _FNSD(dim, vals.dims())
+    safe_call(c_func(c_pointer(keys_out.arr), c_pointer(vals_out.arr), keys.arr, vals.arr, c_int_t(rdim)))
+    return keys_out, vals_out
+
+def _nan_rbk_dim(a, dim, c_func, nan_val):
+    keys_out = Array()
+    vals_out = Array()
+    rdim = _FNSD(dim, vals.dims())
+    safe_call(c_func(c_pointer(keys_out.arr), c_pointer(vals_out.arr), keys.arr, vals.arr, c_int_t(rdim), c_double_t(nan_val)))
+    return keys_out, vals_out
+
 def sum(a, dim=None, nan_val=None):
     """
     Calculate the sum of all the elements along a specified dimension.
@@ -74,6 +99,34 @@ def sum(a, dim=None, nan_val=None):
         else:
             return _reduce_all(a, backend.get().af_sum_all)
 
+
+def sumByKey(keys, vals, dim=-1, nan_val=None):
+    """
+    Calculate the sum of elements along a specified dimension according to a key.
+
+    Parameters
+    ----------
+    keys  : af.Array
+         One dimensional arrayfire array with reduction keys.
+    vals  : af.Array
+         Multi dimensional arrayfire array that will be reduced.
+    dim: optional: int. default: -1
+         Dimension along which the sum will occur.
+    nan_val: optional: scalar. default: None
+         The value that replaces NaN in the array
+
+    Returns
+    -------
+    keys: af.Array or scalar number
+         The reduced keys of all elements in `vals` along dimension `dim`.
+    values: af.Array or scalar number
+         The sum of all elements in `vals` along dimension `dim` according to keys
+    """
+    if (nan_val is not None):
+        return _nan_rbk_dim(keys, vals, dim, backend.get().af_sum_by_key_nan, nan_val)
+    else:
+        return _rbk_dim(keys, vals, dim, backend.get().af_sum_by_key)
+
 def product(a, dim=None, nan_val=None):
     """
     Calculate the product of all the elements along a specified dimension.
@@ -104,6 +157,33 @@ def product(a, dim=None, nan_val=None):
         else:
             return _reduce_all(a, backend.get().af_product_all)
 
+def productByKey(keys, vals, dim=-1, nan_val=None):
+    """
+    Calculate the product of elements along a specified dimension according to a key.
+
+    Parameters
+    ----------
+    keys  : af.Array
+         One dimensional arrayfire array with reduction keys.
+    vals  : af.Array
+         Multi dimensional arrayfire array that will be reduced.
+    dim: optional: int. default: -1
+         Dimension along which the product will occur.
+    nan_val: optional: scalar. default: None
+         The value that replaces NaN in the array
+
+    Returns
+    -------
+    keys: af.Array or scalar number
+         The reduced keys of all elements in `vals` along dimension `dim`.
+    values: af.Array or scalar number
+         The product of all elements in `vals` along dimension `dim` according to keys
+    """
+    if (nan_val is not None):
+        return _nan_rbk_dim(keys, vals, dim, backend.get().af_product_by_key_nan, nan_val)
+    else:
+        return _rbk_dim(keys, vals, dim, backend.get().af_product_by_key)
+
 def min(a, dim=None):
     """
     Find the minimum value of all the elements along a specified dimension.
@@ -126,6 +206,28 @@ def min(a, dim=None):
     else:
         return _reduce_all(a, backend.get().af_min_all)
 
+def minByKey(keys, vals, dim=-1):
+    """
+    Calculate the min of elements along a specified dimension according to a key.
+
+    Parameters
+    ----------
+    keys  : af.Array
+         One dimensional arrayfire array with reduction keys.
+    vals  : af.Array
+         Multi dimensional arrayfire array that will be reduced.
+    dim: optional: int. default: -1
+         Dimension along which the min will occur.
+
+    Returns
+    -------
+    keys: af.Array or scalar number
+         The reduced keys of all elements in `vals` along dimension `dim`.
+    values: af.Array or scalar number
+         The min of all elements in `vals` along dimension `dim` according to keys
+    """
+    return _rbk_dim(keys, vals, dim, backend.get().af_min_by_key)
+
 def max(a, dim=None):
     """
     Find the maximum value of all the elements along a specified dimension.
@@ -148,6 +250,28 @@ def max(a, dim=None):
     else:
         return _reduce_all(a, backend.get().af_max_all)
 
+def maxByKey(keys, vals, dim=-1):
+    """
+    Calculate the max of elements along a specified dimension according to a key.
+
+    Parameters
+    ----------
+    keys  : af.Array
+         One dimensional arrayfire array with reduction keys.
+    vals  : af.Array
+         Multi dimensional arrayfire array that will be reduced.
+    dim: optional: int. default: -1
+         Dimension along which the max will occur.
+
+    Returns
+    -------
+    keys: af.Array or scalar number
+         The reduced keys of all elements in `vals` along dimension `dim`.
+    values: af.Array or scalar number
+         The max of all elements in `vals` along dimension `dim` according to keys.
+    """
+    return _rbk_dim(keys, vals, dim, backend.get().af_max_by_key)
+
 def all_true(a, dim=None):
     """
     Check if all the elements along a specified dimension are true.
@@ -170,6 +294,28 @@ def all_true(a, dim=None):
     else:
         return _reduce_all(a, backend.get().af_all_true_all)
 
+def allTrueByKey(keys, vals, dim=-1):
+    """
+    Calculate if all elements are true along a specified dimension according to a key.
+
+    Parameters
+    ----------
+    keys  : af.Array
+         One dimensional arrayfire array with reduction keys.
+    vals  : af.Array
+         Multi dimensional arrayfire array that will be reduced.
+    dim: optional: int. default: -1
+         Dimension along which the all true check will occur.
+
+    Returns
+    -------
+    keys: af.Array or scalar number
+         The reduced keys of all true check in `vals` along dimension `dim`.
+    values: af.Array or scalar number
+         Booleans denoting if all elements are true in `vals` along dimension `dim` according to keys
+    """
+    return _rbk_dim(keys, vals, dim, backend.get().af_all_true_by_key)
+
 def any_true(a, dim=None):
     """
     Check if any the elements along a specified dimension are true.
@@ -192,6 +338,28 @@ def any_true(a, dim=None):
     else:
         return _reduce_all(a, backend.get().af_any_true_all)
 
+def anyTrueByKey(keys, vals, dim=-1):
+    """
+    Calculate if any elements are true along a specified dimension according to a key.
+
+    Parameters
+    ----------
+    keys  : af.Array
+         One dimensional arrayfire array with reduction keys.
+    vals  : af.Array
+         Multi dimensional arrayfire array that will be reduced.
+    dim: optional: int. default: -1
+         Dimension along which the any true check will occur.
+
+    Returns
+    -------
+    keys: af.Array or scalar number
+         The reduced keys of any true check in `vals` along dimension `dim`.
+    values: af.Array or scalar number
+         Booleans denoting if any elements are true in `vals` along dimension `dim` according to keys.
+    """
+    return _rbk_dim(keys, vals, dim, backend.get().af_any_true_by_key)
+
 def count(a, dim=None):
     """
     Count the number of non zero elements in an array along a specified dimension.
@@ -214,6 +382,28 @@ def count(a, dim=None):
     else:
         return _reduce_all(a, backend.get().af_count_all)
 
+def countByKey(keys, vals, dim=-1):
+    """
+    Counts non-zero elements along a specified dimension according to a key.
+
+    Parameters
+    ----------
+    keys  : af.Array
+         One dimensional arrayfire array with reduction keys.
+    vals  : af.Array
+         Multi dimensional arrayfire array that will be reduced.
+    dim: optional: int. default: -1
+         Dimension along which to count elements.
+
+    Returns
+    -------
+    keys: af.Array or scalar number
+         The reduced keys of count in `vals` along dimension `dim`.
+    values: af.Array or scalar number
+         Count of non-zero elements in `vals` along dimension `dim` according to keys.
+    """
+    return _rbk_dim(keys, vals, dim, backend.get().af_count_by_key)
+
 def imin(a, dim=None):
     """
     Find the value and location of the minimum value along a specified dimension
diff --git a/arrayfire/arith.py b/arrayfire/arith.py
index b0d945f05..e4dc2fdfd 100644
--- a/arrayfire/arith.py
+++ b/arrayfire/arith.py
@@ -958,6 +958,26 @@ def sqrt(a):
     """
     return _arith_unary_func(a, backend.get().af_sqrt)
 
+def rsqrt(a):
+    """
+    Reciprocal or inverse square root of each element in the array.
+
+    Parameters
+    ----------
+    a : af.Array
+        Multi dimensional arrayfire array.
+
+    Returns
+    --------
+    out : af.Array
+         array containing the inverse square root of each value from `a`.
+
+    Note
+    -------
+    `a` must not be complex.
+    """
+    return _arith_unary_func(a, backend.get().af_rsqrt)
+
 def cbrt(a):
     """
     Cube root of each element in the array.
diff --git a/arrayfire/array.py b/arrayfire/array.py
index 801cd502b..289ef4699 100644
--- a/arrayfire/array.py
+++ b/arrayfire/array.py
@@ -783,6 +783,14 @@ def is_single(self):
         safe_call(backend.get().af_is_single(c_pointer(res), self.arr))
         return res.value
 
+    def is_half(self):
+        """
+        Check if the array is of half floating point type (fp16).
+        """
+        res = c_bool_t(False)
+        safe_call(backend.get().af_is_half(c_pointer(res), self.arr))
+        return res.value
+
     def is_real_floating(self):
         """
         Check if the array is real and of floating point type.
diff --git a/arrayfire/data.py b/arrayfire/data.py
index d9ad19e48..1fbe17a53 100644
--- a/arrayfire/data.py
+++ b/arrayfire/data.py
@@ -799,6 +799,58 @@ def replace(lhs, cond, rhs):
     else:
         safe_call(backend.get().af_replace_scalar(lhs.arr, cond.arr, c_double_t(rhs)))
 
+def pad(a, beginPadding, endPadding, padFillType = PAD.ZERO):
+    """
+    Pad an array
+
+    This function will pad an array with the specified border size.
+    Newly padded values can be filled in several different ways.
+
+    Parameters
+    ----------
+
+    a: af.Array
+          A multi dimensional input arrayfire array.
+
+    beginPadding: tuple of ints. default: (0, 0, 0, 0).
+
+    endPadding: tuple of ints. default: (0, 0, 0, 0).
+
+    padFillType: optional af.PAD default: af.PAD.ZERO
+        specifies type of values to fill padded border with
+
+    Returns
+    -------
+    output: af.Array
+           A padded array
+
+    Examples
+    ---------
+    >>> import arrayfire as af
+    >>> a = af.randu(3,3)
+    >>> af.display(a)
+    [3 3 1 1]
+        0.4107     0.1794     0.3775
+        0.8224     0.4198     0.3027
+        0.9518     0.0081     0.6456
+
+    >>> padded = af.pad(a, (1, 1), (1, 1), af.ZERO)
+    >>> af.display(padded)
+    [5 5 1 1]
+        0.0000     0.0000     0.0000     0.0000     0.0000
+        0.0000     0.4107     0.1794     0.3775     0.0000
+        0.0000     0.8224     0.4198     0.3027     0.0000
+        0.0000     0.9518     0.0081     0.6456     0.0000
+        0.0000     0.0000     0.0000     0.0000     0.0000
+    """
+    out = Array()
+    begin_dims = dim4(beginPadding[0], beginPadding[1], beginPadding[2], beginPadding[3])
+    end_dims   = dim4(endPadding[0], endPadding[1], endPadding[2], endPadding[3])
+
+    safe_call(backend.get().af_pad(c_pointer(out.arr), a.arr, 4, c_pointer(begin_dims), 4, c_pointer(end_dims), padFillType.value))
+    return out
+
+
 def lookup(a, idx, dim=0):
     """
     Lookup the values of input array based on index.
diff --git a/arrayfire/device.py b/arrayfire/device.py
index 84594f2b3..53f302db5 100644
--- a/arrayfire/device.py
+++ b/arrayfire/device.py
@@ -150,6 +150,25 @@ def is_dbl_supported(device=None):
     safe_call(backend.get().af_get_dbl_support(c_pointer(res), dev))
     return res.value
 
+def is_half_supported(device=None):
+    """
+    Check if half precision is supported on specified device.
+
+    Parameters
+    -----------
+    device: optional: int. default: None.
+         id of the desired device.
+
+    Returns
+    --------
+        - True if half precision supported.
+        - False if half precision not supported.
+    """
+    dev = device if device is not None else get_device()
+    res = c_bool_t(False)
+    safe_call(backend.get().af_get_half_support(c_pointer(res), dev))
+    return res.value
+
 def sync(device=None):
     """
     Block until all the functions on the device have completed execution.
diff --git a/arrayfire/graphics.py b/arrayfire/graphics.py
index 4e378aaf5..70881f42c 100644
--- a/arrayfire/graphics.py
+++ b/arrayfire/graphics.py
@@ -496,6 +496,34 @@ def set_axes_limits(self, xmin, xmax, ymin, ymax, zmin=None, zmax=None, exact=Fa
                                                           c_float_t(zmin), c_float_t(zmax),
                                                           exact, c_pointer(_cell)))
 
+    def set_axes_label_format(self, xformat="4.1%f", yformat="4.1%f", zformat="4.1%f"):
+        """
+        Set axis limits.
+
+        Parameters
+        ----------
+
+        xformat : str.
+            default: "4.1%f".
+              is a printf-style format specifier for x-axis
+        yformat : str.
+            default: "4.1%f".
+              is a printf-style format specifier for y-axis
+        zformat : str.
+            default: "4.1%f".
+              is a printf-style format specifier for z-axis
+
+        """
+        _cell = _Cell(self._r, self._c, None, self._cmap)
+        xformat = xformat.encode("ascii")
+        yformat = yformat.encode("ascii")
+        zformat = zformat.encode("ascii")
+        safe_call(backend.get().af_set_axes_label_format(self._wnd,
+                                                         c_char_ptr_t(xformat),
+                                                         c_char_ptr_t(yformat),
+                                                         c_char_ptr_t(zformat),
+                                                         c_pointer(_cell)))
+
     def __getitem__(self, keys):
         """
         Get access to a specific grid location within the window.
diff --git a/arrayfire/image.py b/arrayfire/image.py
index 92c8d088b..2fdce0aef 100644
--- a/arrayfire/image.py
+++ b/arrayfire/image.py
@@ -711,6 +711,49 @@ def regions(image, conn = CONNECTIVITY.FOUR, out_type = Dtype.f32):
                                        conn.value, out_type.value))
     return output
 
+def confidenceCC(image, seedx, seedy, radius, multiplier, iters, segmented_value):
+    """
+    Find the confidence connected components in the image.
+
+    Parameters
+    ----------
+    image : af.Array
+          - A 2 D arrayfire array representing an image.
+            Expects non-integral type
+
+    seedx : af.Array
+          - An array with x-coordinates of seed points
+
+    seedy : af.Array
+          - An array with y-coordinates of seed points
+
+    radius : scalar
+          - The neighborhood region to be considered around
+            each seed point
+
+    multiplier : scalar
+          - Controls the threshold range computed from
+            the mean and variance of seed point neighborhoods
+
+    iters : scalar
+          - is number of iterations
+
+    segmented_value : scalar
+          - the value to which output array valid
+            pixels are set to.
+
+    Returns
+    ---------
+
+    output : af.Array
+           - Output array with resulting connected components
+
+    """
+    output = Array()
+    safe_call(backend.get().af_confidence_cc(c_pointer(output.arr), image.arr, seedx.arr, seedy.arr,
+                c_uint_t(radius), c_uint_t(multiplier), c_int_t(iters), c_double_t(segmented_value)))
+    return output
+
 def sobel_derivatives(image, w_len=3):
     """
     Find the sobel derivatives of the image.
diff --git a/arrayfire/lapack.py b/arrayfire/lapack.py
index e6ffd5cb7..97ad92c7a 100644
--- a/arrayfire/lapack.py
+++ b/arrayfire/lapack.py
@@ -264,6 +264,39 @@ def inverse(A, options=MATPROP.NONE):
     safe_call(backend.get().af_inverse(c_pointer(AI.arr), A.arr, options.value))
     return AI
 
+def pinverse(A, tol=1E-6, options=MATPROP.NONE):
+    """
+    Find pseudo-inverse(Moore-Penrose) of a matrix.
+
+    Parameters
+    ----------
+
+    A: af.Array
+       - A 2 dimensional arrayfire input matrix array
+
+    tol: optional: scalar. default: 1E-6.
+       - Tolerance for calculating rank
+
+    options: optional: af.MATPROP. default: af.MATPROP.NONE.
+       - Currently needs to be `af.MATPROP.NONE`.
+       - Additional options may speed up computation in the future
+
+    Returns
+    -------
+
+    AI: af.Array
+       - A 2 dimensional array that is the pseudo-inverse of `A`
+
+    Note
+    ----
+
+    This function is not supported in GFOR
+
+    """
+    AI = Array()
+    safe_call(backend.get().af_pinverse(c_pointer(AI.arr), A.arr, c_double_t(tol), options.value))
+    return AI
+
 def rank(A, tol=1E-5):
     """
     Rank of a matrix.
diff --git a/arrayfire/library.py b/arrayfire/library.py
index 0bdc2eeb9..915ca950a 100644
--- a/arrayfire/library.py
+++ b/arrayfire/library.py
@@ -97,6 +97,7 @@ class ERR(_Enum):
     # 400-499 Errors for missing hardware features
     NO_DBL         = _Enum_Type(401)
     NO_GFX         = _Enum_Type(402)
+    NO_HALF        = _Enum_Type(403)
 
     # 500-599 Errors specific to the heterogeneous API
     LOAD_LIB       = _Enum_Type(501)
@@ -123,6 +124,7 @@ class Dtype(_Enum):
     u64 = _Enum_Type(9)
     s16 = _Enum_Type(10)
     u16 = _Enum_Type(11)
+    f16 = _Enum_Type(12)
 
 class Source(_Enum):
     """
@@ -152,6 +154,8 @@ class PAD(_Enum):
     """
     ZERO = _Enum_Type(0)
     SYM  = _Enum_Type(1)
+    CLAMP_TO_EDGE  = _Enum_Type(2)
+    PERIODIC = _Enum_Type(3)
 
 class CONNECTIVITY(_Enum):
     """
@@ -175,6 +179,15 @@ class CONV_DOMAIN(_Enum):
     SPATIAL = _Enum_Type(1)
     FREQ    = _Enum_Type(2)
 
+class CONV_GRADIENT(_Enum):
+    """
+    Convolution gradient type
+    """
+    DEFAULT = _Enum_Type(0)
+    FILTER  = _Enum_Type(1)
+    DATA    = _Enum_Type(2)
+    BIAS    = _Enum_Type(3)
+
 class MATCH(_Enum):
     """
     Match type
@@ -446,6 +459,14 @@ class TOPK(_Enum):
     MIN     = _Enum_Type(1)
     MAX     = _Enum_Type(2)
 
+class VARIANCE(_Enum):
+    """
+    Variance bias type
+    """
+    DEFAULT    = _Enum_Type(0)
+    SAMPLE     = _Enum_Type(1)
+    POPULATION = _Enum_Type(2)
+
 _VER_MAJOR_PLACEHOLDER = "__VER_MAJOR__"
 
 def _setup():
diff --git a/arrayfire/ml.py b/arrayfire/ml.py
new file mode 100644
index 000000000..9140cc278
--- /dev/null
+++ b/arrayfire/ml.py
@@ -0,0 +1,74 @@
+#######################################################
+# Copyright (c) 2020, ArrayFire
+# All rights reserved.
+#
+# This file is distributed under 3-clause BSD license.
+# The complete license agreement can be obtained at:
+# http://arrayfire.com/licenses/BSD-3-Clause
+########################################################
+
+"""
+Machine learning functions
+    - Pool 2D, ND, maxpooling, minpooling, meanpooling
+    - Forward and backward convolution passes
+"""
+
+from .library import *
+from .array import *
+
+def convolve2GradientNN(incoming_gradient, original_signal, original_kernel, convolved_output, stride = (1, 1), padding = (0, 0), dilation = (1, 1), gradType = CONV_GRADIENT.DEFAULT):
+    """
+    This version of convolution is consistent with the machine learning
+    formulation that will spatially convolve a filter on 2-dimensions against a
+    signal. Multiple signals and filters can be batched against each other.
+    Furthermore, the signals and filters can be multi-dimensional however their
+    dimensions must match.
+
+    Example:
+        Signals with dimensions: d0 x d1 x d2 x Ns
+        Filters with dimensions: d0 x d1 x d2 x Nf
+
+        Resulting Convolution:   d0 x d1 x Nf x Ns
+
+    Parameters
+    -----------
+
+    signal: af.Array
+            - A 2 dimensional signal or batch of 2 dimensional signals.
+
+    kernel: af.Array
+            - A 2 dimensional kernel or batch of 2 dimensional kernels.
+
+    stride: tuple of ints. default: (1, 1).
+            - Specifies how much to stride along each dimension
+
+    padding: tuple of ints. default: (0, 0).
+            - Specifies signal padding along each dimension
+
+    dilation: tuple of ints. default: (1, 1).
+            - Specifies how much to dilate kernel along each dimension before convolution
+
+    Returns
+    --------
+
+    output: af.Array
+          - Gradient wrt/requested gradient type
+
+    """
+    output = Array()
+    stride_dim   = dim4(stride[0],   stride[1])
+    padding_dim  = dim4(padding[0],  padding[1])
+    dilation_dim = dim4(dilation[0], dilation[1])
+
+    safe_call(backend.get().af_convolve2_gradient_nn(
+                                            c_pointer(output.arr),
+                                            incoming_gradient.arr,
+                                            original_signal.arr,
+                                            original_kernel.arr,
+                                            convolved_output.arr,
+                                            2, c_pointer(stride_dim),
+                                            2, c_pointer(padding_dim),
+                                            2, c_pointer(dilation_dim),
+                                            gradType.value))
+    return output
+
diff --git a/arrayfire/signal.py b/arrayfire/signal.py
index ca27d21c2..1fa6c424d 100644
--- a/arrayfire/signal.py
+++ b/arrayfire/signal.py
@@ -972,6 +972,56 @@ def convolve2(signal, kernel, conv_mode = CONV_MODE.DEFAULT, conv_domain = CONV_
                                          conv_mode.value, conv_domain.value))
     return output
 
+def convolve2NN(signal, kernel, stride = (1, 1), padding = (0, 0), dilation = (1, 1)):
+    """
+    This version of convolution is consistent with the machine learning
+    formulation that will spatially convolve a filter on 2-dimensions against a
+    signal. Multiple signals and filters can be batched against each other.
+    Furthermore, the signals and filters can be multi-dimensional however their
+    dimensions must match.
+
+    Example:
+        Signals with dimensions: d0 x d1 x d2 x Ns
+        Filters with dimensions: d0 x d1 x d2 x Nf
+
+        Resulting Convolution:   d0 x d1 x Nf x Ns
+
+    Parameters
+    -----------
+
+    signal: af.Array
+            - A 2 dimensional signal or batch of 2 dimensional signals.
+
+    kernel: af.Array
+            - A 2 dimensional kernel or batch of 2 dimensional kernels.
+
+    stride: tuple of ints. default: (1, 1).
+            - Specifies how much to stride along each dimension
+
+    padding: tuple of ints. default: (0, 0).
+            - Specifies signal padding along each dimension
+
+    dilation: tuple of ints. default: (1, 1).
+            - Specifies how much to dilate kernel along each dimension before convolution
+
+    Returns
+    --------
+
+    output: af.Array
+          - Convolved 2D array.
+
+    """
+    output = Array()
+    stride_dim   = dim4(stride[0],   stride[1])
+    padding_dim  = dim4(padding[0],  padding[1])
+    dilation_dim = dim4(dilation[0], dilation[1])
+
+    safe_call(backend.get().af_convolve2_nn(c_pointer(output.arr), signal.arr, kernel.arr,
+                                            2, c_pointer(stride_dim),
+                                            2, c_pointer(padding_dim),
+                                            2, c_pointer(dilation_dim)))
+    return output
+
 def convolve2_separable(col_kernel, row_kernel, signal, conv_mode = CONV_MODE.DEFAULT):
     """
     Convolution: 2D separable convolution
diff --git a/arrayfire/statistics.py b/arrayfire/statistics.py
index e6b4effd0..f47f3a48d 100644
--- a/arrayfire/statistics.py
+++ b/arrayfire/statistics.py
@@ -108,6 +108,48 @@ def var(a, isbiased=False, weights=None, dim=None):
 
         return real if imag == 0 else real + imag * 1j
 
+def meanvar(a, weights=None, bias=VARIANCE.DEFAULT, dim=-1):
+    """
+    Calculate mean and variance along a given dimension.
+
+    Parameters
+    ----------
+    a: af.Array
+        The input array.
+
+    weights: optional: af.Array. default: None.
+        Array to calculate for the weighted mean. Must match size of
+        the input array.
+
+    bias: optional: af.VARIANCE. default: DEFAULT.
+        population variance(VARIANCE.POPULATION) or
+        sample variance(VARIANCE.SAMPLE).
+
+    dim: optional: int. default: -1.
+        The dimension for which to obtain the variance from input data.
+
+    Returns
+    -------
+    mean: af.Array
+        Array containing the mean of the input array along a given
+        dimension.
+    variance: af.Array
+        Array containing the variance of the input array along a given
+        dimension.
+    """
+
+    mean_out = Array()
+    var_out  = Array()
+
+    if weights is None:
+        weights  = Array()
+
+    safe_call(backend.get().af_meanvar(c_pointer(mean_out.arr), c_pointer(var_out.arr),
+                                       a.arr, weights.arr, bias.value, c_int_t(dim)))
+
+    return mean_out, var_out
+
+
 def stdev(a, dim=None):
     """
     Calculate standard deviation along a given dimension.
diff --git a/arrayfire/util.py b/arrayfire/util.py
index 709bd7811..44af6000d 100644
--- a/arrayfire/util.py
+++ b/arrayfire/util.py
@@ -105,7 +105,8 @@ def get_reversion():
             'l' : Dtype.s64,
             'L' : Dtype.u64,
             'F' : Dtype.c32,
-            'D' : Dtype.c64}
+            'D' : Dtype.c64,
+            'hf': Dtype.f16}
 
 to_typecode = {Dtype.f32.value : 'f',
                Dtype.f64.value : 'd',
@@ -118,7 +119,8 @@ def get_reversion():
                Dtype.s64.value : 'l',
                Dtype.u64.value : 'L',
                Dtype.c32.value : 'F',
-               Dtype.c64.value : 'D'}
+               Dtype.c64.value : 'D',
+               Dtype.f16.value : 'hf'}
 
 to_c_type = {Dtype.f32.value : c_float_t,
              Dtype.f64.value : c_double_t,
@@ -131,7 +133,8 @@ def get_reversion():
              Dtype.s64.value : c_longlong_t,
              Dtype.u64.value : c_ulonglong_t,
              Dtype.c32.value : c_float_t * 2,
-             Dtype.c64.value : c_double_t * 2}
+             Dtype.c64.value : c_double_t * 2,
+             Dtype.f16.value : c_ushort_t}
 
 to_typename = {Dtype.f32.value : 'float',
                Dtype.f64.value : 'double',
@@ -144,4 +147,5 @@ def get_reversion():
                Dtype.s64.value : 'long int',
                Dtype.u64.value : 'unsigned long int',
                Dtype.c32.value : 'float complex',
-               Dtype.c64.value : 'double complex'}
+               Dtype.c64.value : 'double complex',
+               Dtype.f16.value : 'half'}
diff --git a/setup.cfg b/setup.cfg
index 0e02078f1..e4f536a88 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = arrayfire
-version = 3.6.20181017
+version = 3.7.20200213
 description = Python bindings for ArrayFire
 licence = BSD
 long_description = file: README.md
diff --git a/tests/simple/algorithm.py b/tests/simple/algorithm.py
index 5b40d6916..b9e42f138 100644
--- a/tests/simple/algorithm.py
+++ b/tests/simple/algorithm.py
@@ -27,6 +27,30 @@ def simple_algorithm(verbose=False):
     display_func(af.sum(a, 0))
     display_func(af.sum(a, 1))
 
+    rk = af.constant(1, 3, dtype=af.Dtype.u32)
+    rk[2] = 0
+    af.eval(rk)
+    display_func(af.sumByKey(rk, a, dim=0))
+    display_func(af.sumByKey(rk, a, dim=1))
+
+    display_func(af.productByKey(rk, a, dim=0))
+    display_func(af.productByKey(rk, a, dim=1))
+
+    display_func(af.minByKey(rk, a, dim=0))
+    display_func(af.minByKey(rk, a, dim=1))
+
+    display_func(af.maxByKey(rk, a, dim=0))
+    display_func(af.maxByKey(rk, a, dim=1))
+
+    display_func(af.anyTrueByKey(rk, a, dim=0))
+    display_func(af.anyTrueByKey(rk, a, dim=1))
+
+    display_func(af.allTrueByKey(rk, a, dim=0))
+    display_func(af.allTrueByKey(rk, a, dim=1))
+
+    display_func(af.countByKey(rk, a, dim=0))
+    display_func(af.countByKey(rk, a, dim=1))
+
     display_func(af.product(a, 0))
     display_func(af.product(a, 1))
 
diff --git a/tests/simple/arith.py b/tests/simple/arith.py
index 306b93fff..5d4d83d00 100644
--- a/tests/simple/arith.py
+++ b/tests/simple/arith.py
@@ -192,6 +192,7 @@ def simple_arith(verbose=False):
     display_func(af.log10(a))
     display_func(af.log2(a))
     display_func(af.sqrt(a))
+    display_func(af.rsqrt(a))
     display_func(af.cbrt(a))
 
     a = af.round(5 * af.randu(3, 3) - 1)
diff --git a/tests/simple/data.py b/tests/simple/data.py
index d80f9e125..d091497eb 100644
--- a/tests/simple/data.py
+++ b/tests/simple/data.py
@@ -72,5 +72,6 @@ def simple_data(verbose=False):
     af.replace(a, a > 0.3, -0.3)
     display_func(a)
 
+    display_func(af.pad(a, (1, 1, 0, 0), (2, 2, 0, 0)))
 
 _util.tests["data"] = simple_data
diff --git a/tests/simple/image.py b/tests/simple/image.py
index 1489e94dc..8c2212974 100644
--- a/tests/simple/image.py
+++ b/tests/simple/image.py
@@ -51,6 +51,9 @@ def simple_image(verbose=False):
     display_func(af.maxfilt(a))
 
     display_func(af.regions(af.round(a) > 3))
+    display_func(af.confidenceCC(af.randu(10, 10),
+        (af.randu(2) * 9).as_type(af.Dtype.u32), (af.randu(2) * 9).as_type(af.Dtype.u32), 3, 3, 10, 0.1))
+
 
     dx, dy = af.sobel_derivatives(a)
     display_func(dx)
diff --git a/tests/simple/lapack.py b/tests/simple/lapack.py
index e27fb6bc0..8cd3e9ac3 100644
--- a/tests/simple/lapack.py
+++ b/tests/simple/lapack.py
@@ -58,6 +58,9 @@ def simple_lapack(verbose=False):
     display_func(a)
     display_func(ai)
 
+    ai = af.pinverse(a)
+    display_func(ai)
+
     x0 = af.randu(5, 3)
     b = af.matmul(a, x0)
     x1 = af.solve(a, b)
diff --git a/tests/simple/signal.py b/tests/simple/signal.py
index d92526488..9e72e6e35 100644
--- a/tests/simple/signal.py
+++ b/tests/simple/signal.py
@@ -101,6 +101,11 @@ def simple_signal(verbose=False):
     display_func(af.convolve(a, b))
     display_func(af.fft_convolve(a, b))
 
+    c = af.convolve2NN(a, b)
+    display_func(c)
+    g = af.convolve2NN(a, b, c, gradType=af.CONV_GRADIENT.DATA)
+    display_func(g)
+
     a = af.randu(5, 5, 3)
     b = af.randu(3, 3, 2)
     display_func(af.convolve3(a, b))
diff --git a/tests/simple/statistics.py b/tests/simple/statistics.py
index 2815af335..be639ea4a 100644
--- a/tests/simple/statistics.py
+++ b/tests/simple/statistics.py
@@ -34,6 +34,13 @@ def simple_statistics(verbose=False):
     print_func(af.var(a, isbiased=True))
     print_func(af.var(a, weights=w))
 
+    mean, var = af.mean_var(a, dim=0)
+    display_func(mean)
+    display_func(var)
+    mean, var = af.mean_var(a, weights=w, bias=VARIANCE.SAMPLE, dim=0)
+    display_func(mean)
+    display_func(var)
+
     display_func(af.stdev(a, dim=0))
     print_func(af.stdev(a))