diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index 3ff98b7b5a9b5..c061102fbaddc 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -355,19 +355,38 @@ cdef class {{name}}HashTable(HashTable):
 
         return np.asarray(locs)
 
-    def factorize(self, {{dtype}}_t values):
-        uniques = {{name}}Vector()
-        labels = self.get_labels(values, uniques, 0, 0)
-        return uniques.to_array(), labels
-
     @cython.boundscheck(False)
-    def get_labels(self, const {{dtype}}_t[:] values, {{name}}Vector uniques,
-                   Py_ssize_t count_prior, Py_ssize_t na_sentinel,
+    @cython.wraparound(False)
+    def _factorize(self, const {{dtype}}_t[:] values, {{name}}Vector uniques,
+                   Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1,
                    object na_value=None):
+        """
+        Calculate unique values and labels (no sorting); ignores all NA-values
+
+        Parameters
+        ----------
+        values : ndarray[{{dtype}}]
+            Array of values of which unique will be calculated
+        uniques : {{name}}Vector
+            Vector into which uniques will be written
+        count_prior : Py_ssize_t, default 0
+            Number of existing entries in uniques
+        na_sentinel : Py_ssize_t, default -1
+            Sentinel value used for all NA-values in inverse
+        na_value : object, default None
+            Value to identify as missing. If na_value is None, then
+            any value satisfying val!=val are considered missing.
+
+        Returns
+        -------
+        uniques : ndarray[{{dtype}}]
+            Unique values of input, not sorted
+        labels : ndarray[int64]
+            The labels from values to uniques
+        """
         cdef:
-            Py_ssize_t i, n = len(values)
+            Py_ssize_t i, idx, count = count_prior, n = len(values)
             int64_t[:] labels
-            Py_ssize_t idx, count = count_prior
             int ret = 0
             {{dtype}}_t val, na_value2
             khiter_t k
@@ -399,9 +418,11 @@ cdef class {{name}}HashTable(HashTable):
                 k = kh_get_{{dtype}}(self.table, val)
 
                 if k != self.table.n_buckets:
+                    # k falls into a previous bucket
                     idx = self.table.vals[k]
                     labels[i] = idx
                 else:
+                    # k hasn't been seen yet
                     k = kh_put_{{dtype}}(self.table, val, &ret)
                     self.table.vals[k] = count
 
@@ -418,6 +439,19 @@ cdef class {{name}}HashTable(HashTable):
 
         return np.asarray(labels)
 
+    def factorize(self, const {{dtype}}_t[:] values, Py_ssize_t na_sentinel=-1,
+                  object na_value=None):
+        uniques = {{name}}Vector()
+        labels = self._factorize(values, uniques=uniques,
+                                 na_sentinel=na_sentinel, na_value=na_value)
+        return labels, uniques.to_array()
+
+    def get_labels(self, const {{dtype}}_t[:] values, {{name}}Vector uniques,
+                   Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1,
+                   object na_value=None):
+        return self._factorize(values, uniques, count_prior=count_prior,
+                               na_sentinel=na_sentinel, na_value=na_value)
+
     @cython.boundscheck(False)
     def get_labels_groupby(self, const {{dtype}}_t[:] values):
         cdef:
@@ -464,7 +498,21 @@ cdef class {{name}}HashTable(HashTable):
         return np.asarray(labels), arr_uniques
 
     @cython.boundscheck(False)
+    @cython.wraparound(False)
     def unique(self, const {{dtype}}_t[:] values):
+        """
+        Calculate unique values without sorting
+
+        Parameters
+        ----------
+        values : ndarray[{{dtype}}]
+            Array of values of which unique will be calculated
+
+        Returns
+        -------
+        uniques : ndarray[{{dtype}}]
+            Unique values of input, not sorted
+        """
         cdef:
             Py_ssize_t i, n = len(values)
             int ret = 0
@@ -567,7 +615,21 @@ cdef class StringHashTable(HashTable):
         return labels
 
     @cython.boundscheck(False)
+    @cython.wraparound(False)
     def unique(self, ndarray[object] values):
+        """
+        Calculate unique values without sorting
+
+        Parameters
+        ----------
+        values : ndarray[object]
+            Array of values of which unique will be calculated
+
+        Returns
+        -------
+        uniques : ndarray[object]
+            Unique values of input, not sorted
+        """
         cdef:
             Py_ssize_t i, count, n = len(values)
             int64_t[:] uindexer
@@ -602,11 +664,6 @@ cdef class StringHashTable(HashTable):
             uniques.append(values[uindexer[i]])
         return uniques.to_array()
 
-    def factorize(self, ndarray[object] values):
-        uniques = ObjectVector()
-        labels = self.get_labels(values, uniques, 0, 0)
-        return uniques.to_array(), labels
-
     @cython.boundscheck(False)
     def lookup(self, ndarray[object] values):
         cdef:
@@ -669,14 +726,37 @@ cdef class StringHashTable(HashTable):
         free(vecs)
 
     @cython.boundscheck(False)
-    def get_labels(self, ndarray[object] values, ObjectVector uniques,
-                   Py_ssize_t count_prior, int64_t na_sentinel,
+    @cython.wraparound(False)
+    def _factorize(self, ndarray[object] values, ObjectVector uniques,
+                   Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1,
                    object na_value=None):
+        """
+        Calculate unique values and labels (no sorting); ignores all NA-values
+
+        Parameters
+        ----------
+        values : ndarray[object]
+            Array of values of which unique will be calculated
+        uniques : ObjectVector
+            Vector into which uniques will be written
+        count_prior : Py_ssize_t, default 0
+            Number of existing entries in uniques
+        na_sentinel : Py_ssize_t, default -1
+            Sentinel value used for all NA-values in inverse
+        na_value : object, default None
+            Value to identify as missing
+
+        Returns
+        -------
+        uniques : ndarray[object]
+            Unique values of input, not sorted
+        labels : ndarray[int64]
+            The labels from values to uniques
+        """
         cdef:
-            Py_ssize_t i, n = len(values)
+            Py_ssize_t i, idx, count = count_prior, n = len(values)
             int64_t[:] labels
             int64_t[:] uindexer
-            Py_ssize_t idx, count = count_prior
             int ret = 0
             object val
             const char *v
@@ -684,19 +764,17 @@ cdef class StringHashTable(HashTable):
             khiter_t k
             bint use_na_value
 
-        # these by-definition *must* be strings
         labels = np.zeros(n, dtype=np.int64)
         uindexer = np.empty(n, dtype=np.int64)
         use_na_value = na_value is not None
 
-        # pre-filter out missing
-        # and assign pointers
+        # assign pointers and pre-filter out missing
         vecs = <const char **> malloc(n * sizeof(char *))
         for i in range(n):
             val = values[i]
 
-            if ((PyUnicode_Check(val) or PyString_Check(val)) and
-                    not (use_na_value and val == na_value)):
+            if ((PyUnicode_Check(val) or PyString_Check(val))
+                    and not (use_na_value and val == na_value)):
                 v = util.get_c_string(val)
                 vecs[i] = v
             else:
@@ -711,9 +789,11 @@ cdef class StringHashTable(HashTable):
                 v = vecs[i]
                 k = kh_get_str(self.table, v)
                 if k != self.table.n_buckets:
+                    # k falls into a previous bucket
                     idx = self.table.vals[k]
                     labels[i] = <int64_t>idx
                 else:
+                    # k hasn't been seen yet
                     k = kh_put_str(self.table, v, &ret)
                     self.table.vals[k] = count
                     uindexer[count] = i
@@ -728,6 +808,19 @@ cdef class StringHashTable(HashTable):
 
         return np.asarray(labels)
 
+    def factorize(self, ndarray[object] values, Py_ssize_t na_sentinel=-1,
+                  object na_value=None):
+        uniques = ObjectVector()
+        labels = self._factorize(values, uniques=uniques,
+                                 na_sentinel=na_sentinel, na_value=na_value)
+        return labels, uniques.to_array()
+
+    def get_labels(self, ndarray[object] values, ObjectVector uniques,
+                   Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1,
+                   object na_value=None):
+        return self._factorize(values, uniques, count_prior=count_prior,
+                               na_sentinel=na_sentinel, na_value=na_value)
+
 
 cdef class PyObjectHashTable(HashTable):
 
@@ -814,7 +907,22 @@ cdef class PyObjectHashTable(HashTable):
 
         return np.asarray(locs)
 
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
     def unique(self, ndarray[object] values):
+        """
+        Calculate unique values without sorting
+
+        Parameters
+        ----------
+        values : ndarray[object]
+            Array of values of which unique will be calculated
+
+        Returns
+        -------
+        uniques : ndarray[object]
+            Unique values of input, not sorted
+        """
         cdef:
             Py_ssize_t i, n = len(values)
             int ret = 0
@@ -832,13 +940,38 @@ cdef class PyObjectHashTable(HashTable):
 
         return uniques.to_array()
 
-    def get_labels(self, ndarray[object] values, ObjectVector uniques,
-                   Py_ssize_t count_prior, int64_t na_sentinel,
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def _factorize(self, ndarray[object] values, ObjectVector uniques,
+                   Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1,
                    object na_value=None):
+        """
+        Calculate unique values and labels (no sorting); ignores all NA-values
+
+        Parameters
+        ----------
+        values : ndarray[object]
+            Array of values of which unique will be calculated
+        uniques : ObjectVector
+            Vector into which uniques will be written
+        count_prior : Py_ssize_t, default 0
+            Number of existing entries in uniques
+        na_sentinel : Py_ssize_t, default -1
+            Sentinel value used for all NA-values in inverse
+        na_value : object, default None
+            Value to identify as missing. If na_value is None, then None _plus_
+            any value satisfying val!=val are considered missing.
+
+        Returns
+        -------
+        uniques : ndarray[object]
+            Unique values of input, not sorted
+        labels : ndarray[int64]
+            The labels from values to uniques
+        """
         cdef:
-            Py_ssize_t i, n = len(values)
+            Py_ssize_t i, idx, count = count_prior, n = len(values)
             int64_t[:] labels
-            Py_ssize_t idx, count = count_prior
             int ret = 0
             object val
             khiter_t k
@@ -851,16 +984,18 @@ cdef class PyObjectHashTable(HashTable):
             val = values[i]
             hash(val)
 
-            if ((val != val or val is None) or
-                    (use_na_value and val == na_value)):
+            if ((val != val or val is None)
+                    or (use_na_value and val == na_value)):
                 labels[i] = na_sentinel
                 continue
 
             k = kh_get_pymap(self.table, <PyObject*>val)
             if k != self.table.n_buckets:
+                # k falls into a previous bucket
                 idx = self.table.vals[k]
                 labels[i] = idx
             else:
+                # k hasn't been seen yet
                 k = kh_put_pymap(self.table, <PyObject*>val, &ret)
                 self.table.vals[k] = count
                 uniques.append(val)
@@ -868,3 +1003,16 @@ cdef class PyObjectHashTable(HashTable):
                 count += 1
 
         return np.asarray(labels)
+
+    def factorize(self, ndarray[object] values, Py_ssize_t na_sentinel=-1,
+                  object na_value=None):
+        uniques = ObjectVector()
+        labels = self._factorize(values, uniques=uniques,
+                                 na_sentinel=na_sentinel, na_value=na_value)
+        return labels, uniques.to_array()
+
+    def get_labels(self, ndarray[object] values, ObjectVector uniques,
+                   Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1,
+                   object na_value=None):
+        return self._factorize(values, uniques, count_prior=count_prior,
+                               na_sentinel=na_sentinel, na_value=na_value)
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index cb9ffc4bd0fd5..0f1eb12883fd5 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -467,15 +467,13 @@ def _factorize_array(values, na_sentinel=-1, size_hint=None,
     -------
     labels, uniques : ndarray
     """
-    (hash_klass, vec_klass), values = _get_data_algo(values, _hashtables)
+    (hash_klass, _), values = _get_data_algo(values, _hashtables)
 
     table = hash_klass(size_hint or len(values))
-    uniques = vec_klass()
-    labels = table.get_labels(values, uniques, 0, na_sentinel,
-                              na_value=na_value)
+    labels, uniques = table.factorize(values, na_sentinel=na_sentinel,
+                                      na_value=na_value)
 
     labels = ensure_platform_int(labels)
-    uniques = uniques.to_array()
     return labels, uniques
 
 
diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index 1fd801c68fdde..557669260604a 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -15,7 +15,6 @@
 from pandas import compat
 from pandas._libs import (groupby as libgroupby, algos as libalgos,
                           hashtable as ht)
-from pandas._libs.hashtable import unique_label_indices
 from pandas.compat import lrange, range
 import pandas.core.algorithms as algos
 import pandas.core.common as com
@@ -228,19 +227,53 @@ def test_complex_sorting(self):
 
         pytest.raises(TypeError, algos.factorize, x17[::-1], sort=True)
 
+    def test_float64_factorize(self, writable):
+        data = np.array([1.0, 1e8, 1.0, 1e-8, 1e8, 1.0], dtype=np.float64)
+        data.setflags(write=writable)
+        exp_labels = np.array([0, 1, 0, 2, 1, 0], dtype=np.intp)
+        exp_uniques = np.array([1.0, 1e8, 1e-8], dtype=np.float64)
+
+        labels, uniques = algos.factorize(data)
+        tm.assert_numpy_array_equal(labels, exp_labels)
+        tm.assert_numpy_array_equal(uniques, exp_uniques)
+
     def test_uint64_factorize(self, writable):
-        data = np.array([2**63, 1, 2**63], dtype=np.uint64)
+        data = np.array([2**64 - 1, 1, 2**64 - 1], dtype=np.uint64)
         data.setflags(write=writable)
         exp_labels = np.array([0, 1, 0], dtype=np.intp)
-        exp_uniques = np.array([2**63, 1], dtype=np.uint64)
+        exp_uniques = np.array([2**64 - 1, 1], dtype=np.uint64)
 
         labels, uniques = algos.factorize(data)
         tm.assert_numpy_array_equal(labels, exp_labels)
         tm.assert_numpy_array_equal(uniques, exp_uniques)
 
-        data = np.array([2**63, -1, 2**63], dtype=object)
+    def test_int64_factorize(self, writable):
+        data = np.array([2**63 - 1, -2**63, 2**63 - 1], dtype=np.int64)
+        data.setflags(write=writable)
         exp_labels = np.array([0, 1, 0], dtype=np.intp)
-        exp_uniques = np.array([2**63, -1], dtype=object)
+        exp_uniques = np.array([2**63 - 1, -2**63], dtype=np.int64)
+
+        labels, uniques = algos.factorize(data)
+        tm.assert_numpy_array_equal(labels, exp_labels)
+        tm.assert_numpy_array_equal(uniques, exp_uniques)
+
+    def test_string_factorize(self, writable):
+        data = np.array(['a', 'c', 'a', 'b', 'c'],
+                        dtype=object)
+        data.setflags(write=writable)
+        exp_labels = np.array([0, 1, 0, 2, 1], dtype=np.intp)
+        exp_uniques = np.array(['a', 'c', 'b'], dtype=object)
+
+        labels, uniques = algos.factorize(data)
+        tm.assert_numpy_array_equal(labels, exp_labels)
+        tm.assert_numpy_array_equal(uniques, exp_uniques)
+
+    def test_object_factorize(self, writable):
+        data = np.array(['a', 'c', None, np.nan, 'a', 'b', pd.NaT, 'c'],
+                        dtype=object)
+        data.setflags(write=writable)
+        exp_labels = np.array([0, 1, -1, -1, 0, 2, -1, 1], dtype=np.intp)
+        exp_uniques = np.array(['a', 'c', 'b'], dtype=object)
 
         labels, uniques = algos.factorize(data)
         tm.assert_numpy_array_equal(labels, exp_labels)
@@ -1262,41 +1295,107 @@ def test_get_unique(self):
         exp = np.array([1, 2, 2**63], dtype=np.uint64)
         tm.assert_numpy_array_equal(s.unique(), exp)
 
-    def test_vector_resize(self, writable):
+    @pytest.mark.parametrize('nvals', [0, 10])  # resizing to 0 is special case
+    @pytest.mark.parametrize('htable, uniques, dtype, safely_resizes', [
+        (ht.PyObjectHashTable, ht.ObjectVector, 'object', False),
+        (ht.StringHashTable, ht.ObjectVector, 'object', True),
+        (ht.Float64HashTable, ht.Float64Vector, 'float64', False),
+        (ht.Int64HashTable, ht.Int64Vector, 'int64', False),
+        (ht.UInt64HashTable, ht.UInt64Vector, 'uint64', False)])
+    def test_vector_resize(self, writable, htable, uniques, dtype,
+                           safely_resizes, nvals):
         # Test for memory errors after internal vector
-        # reallocations (pull request #7157)
-
-        def _test_vector_resize(htable, uniques, dtype, nvals, safely_resizes):
-            vals = np.array(np.random.randn(1000), dtype=dtype)
-            # GH 21688 ensure we can deal with readonly memory views
-            vals.setflags(write=writable)
-            # get_labels may append to uniques
-            htable.get_labels(vals[:nvals], uniques, 0, -1)
-            # to_array() set an external_view_exists flag on uniques.
-            tmp = uniques.to_array()
-            oldshape = tmp.shape
-            # subsequent get_labels() calls can no longer append to it
-            # (for all but StringHashTables + ObjectVector)
-            if safely_resizes:
+        # reallocations (GH 7157)
+        vals = np.array(np.random.randn(1000), dtype=dtype)
+
+        # GH 21688 ensures we can deal with read-only memory views
+        vals.setflags(write=writable)
+
+        # initialise instances; cannot initialise in parametrization,
+        # as otherwise external views would be held on the array (which is
+        # one of the things this test is checking)
+        htable = htable()
+        uniques = uniques()
+
+        # get_labels may append to uniques
+        htable.get_labels(vals[:nvals], uniques, 0, -1)
+        # to_array() sets an external_view_exists flag on uniques.
+        tmp = uniques.to_array()
+        oldshape = tmp.shape
+
+        # subsequent get_labels() calls can no longer append to it
+        # (except for StringHashTables + ObjectVector)
+        if safely_resizes:
+            htable.get_labels(vals, uniques, 0, -1)
+        else:
+            with tm.assert_raises_regex(ValueError, 'external reference.*'):
                 htable.get_labels(vals, uniques, 0, -1)
-            else:
-                with pytest.raises(ValueError) as excinfo:
-                    htable.get_labels(vals, uniques, 0, -1)
-                assert str(excinfo.value).startswith('external reference')
-            uniques.to_array()   # should not raise here
-            assert tmp.shape == oldshape
-
-        test_cases = [
-            (ht.PyObjectHashTable, ht.ObjectVector, 'object', False),
-            (ht.StringHashTable, ht.ObjectVector, 'object', True),
-            (ht.Float64HashTable, ht.Float64Vector, 'float64', False),
-            (ht.Int64HashTable, ht.Int64Vector, 'int64', False),
-            (ht.UInt64HashTable, ht.UInt64Vector, 'uint64', False)]
-
-        for (tbl, vect, dtype, safely_resizes) in test_cases:
-            # resizing to empty is a special case
-            _test_vector_resize(tbl(), vect(), dtype, 0, safely_resizes)
-            _test_vector_resize(tbl(), vect(), dtype, 10, safely_resizes)
+
+        uniques.to_array()   # should not raise here
+        assert tmp.shape == oldshape
+
+    @pytest.mark.parametrize('htable, tm_dtype', [
+        (ht.PyObjectHashTable, 'String'),
+        (ht.StringHashTable, 'String'),
+        (ht.Float64HashTable, 'Float'),
+        (ht.Int64HashTable, 'Int'),
+        (ht.UInt64HashTable, 'UInt')])
+    def test_hashtable_unique(self, htable, tm_dtype, writable):
+        # output of maker has guaranteed unique elements
+        maker = getattr(tm, 'make' + tm_dtype + 'Index')
+        s = Series(maker(1000))
+        if htable == ht.Float64HashTable:
+            # add NaN for float column
+            s.loc[500] = np.nan
+        elif htable == ht.PyObjectHashTable:
+            # use different NaN types for object column
+            s.loc[500:502] = [np.nan, None, pd.NaT]
+
+        # create duplicated selection
+        s_duplicated = s.sample(frac=3, replace=True).reset_index(drop=True)
+        s_duplicated.values.setflags(write=writable)
+
+        # drop_duplicates has own cython code (hash_table_func_helper.pxi)
+        # and is tested separately; keeps first occurrence like ht.unique()
+        expected_unique = s_duplicated.drop_duplicates(keep='first').values
+        result_unique = htable().unique(s_duplicated.values)
+        tm.assert_numpy_array_equal(result_unique, expected_unique)
+
+    @pytest.mark.parametrize('htable, tm_dtype', [
+        (ht.PyObjectHashTable, 'String'),
+        (ht.StringHashTable, 'String'),
+        (ht.Float64HashTable, 'Float'),
+        (ht.Int64HashTable, 'Int'),
+        (ht.UInt64HashTable, 'UInt')])
+    def test_hashtable_factorize(self, htable, tm_dtype, writable):
+        # output of maker has guaranteed unique elements
+        maker = getattr(tm, 'make' + tm_dtype + 'Index')
+        s = Series(maker(1000))
+        if htable == ht.Float64HashTable:
+            # add NaN for float column
+            s.loc[500] = np.nan
+        elif htable == ht.PyObjectHashTable:
+            # use different NaN types for object column
+            s.loc[500:502] = [np.nan, None, pd.NaT]
+
+        # create duplicated selection
+        s_duplicated = s.sample(frac=3, replace=True).reset_index(drop=True)
+        s_duplicated.values.setflags(write=writable)
+        na_mask = s_duplicated.isna().values
+
+        result_inverse, result_unique = htable().factorize(s_duplicated.values)
+
+        # drop_duplicates has own cython code (hash_table_func_helper.pxi)
+        # and is tested separately; keeps first occurrence like ht.factorize()
+        # since factorize removes all NaNs, we do the same here
+        expected_unique = s_duplicated.dropna().drop_duplicates().values
+        tm.assert_numpy_array_equal(result_unique, expected_unique)
+
+        # reconstruction can only succeed if the inverse is correct. Since
+        # factorize removes the NaNs, those have to be excluded here as well
+        result_reconstruct = result_unique[result_inverse[~na_mask]]
+        expected_reconstruct = s_duplicated.dropna().values
+        tm.assert_numpy_array_equal(result_reconstruct, expected_reconstruct)
 
 
 def test_quantile():
@@ -1311,14 +1410,14 @@ def test_unique_label_indices():
 
     a = np.random.randint(1, 1 << 10, 1 << 15).astype('i8')
 
-    left = unique_label_indices(a)
+    left = ht.unique_label_indices(a)
     right = np.unique(a, return_index=True)[1]
 
     tm.assert_numpy_array_equal(left, right,
                                 check_dtype=False)
 
     a[np.random.choice(len(a), 10)] = -1
-    left = unique_label_indices(a)
+    left = ht.unique_label_indices(a)
     right = np.unique(a, return_index=True)[1][1:]
     tm.assert_numpy_array_equal(left, right,
                                 check_dtype=False)