Add "deferred" argument

vmarkovtsev · vmarkovtsev · commit 825cd31240ab · 2017-11-24T18:43:45.000+01:00
Signed-off-by: Vadim Markovtsev &lt;vadim@sourced.tech&gt;
diff --git a/README.md b/README.md
@@ -90,7 +90,7 @@ Python API
 Import "libMHCUDA".
 
 ```python
-def minhash_cuda_init(dim, samples, seed=time(), devices=0, verbosity=0)
+def minhash_cuda_init(dim, samples, seed=time(), deferred=False, devices=0, verbosity=0)
 ```
 Creates the hasher.
 
@@ -103,6 +103,10 @@ Creates the hasher.
 
 **seed** integer, the random generator seed for reproducible results.
 
+**deferred** boolean, if True, disables the initialization of WMH parameters with
+             random numbers. In that case, the user is expected to call
+             minhash_cuda_assign_random_vars() afterwards.
+
 **devices** integer, bitwise OR-ed CUDA device indices, e.g. 1 means first device, 2 means second device,
             3 means using first and second device. Special value 0 enables all available devices.
             Default value is 0.
@@ -143,7 +147,7 @@ Include "minhashcuda.h".
 
 ```C
 MinhashCudaGenerator* mhcuda_init(
-    uint32_t dim, uint16_t samples, uint32_t seed,
+    uint32_t dim, uint16_t samples, uint32_t seed, int deferred,
     uint32_t devices, int verbosity, MHCUDAResult *status)
 ```
 Initializes the Weighted MinHash generator.
@@ -156,6 +160,10 @@ Initializes the Weighted MinHash generator.
 
 **seed** the random generator seed for reproducible results.
 
+**deferred** if set to anything except 0, disables the initialization of WMH parameters with
+             random numbers. In that case, the user is expected to call
+             mhcuda_assign_random_vars() afterwards.
+
 **devices** bitwise OR-ed CUDA device indices, e.g. 1 means first device, 2 means second device,
             3 means using first and second device. Special value 0 enables all available devices.
 
diff --git a/minhashcuda.cc b/minhashcuda.cc
@@ -159,12 +159,23 @@ class CurandGenerator : public unique_devptr_parent<curandGenerator_st> {
 };
 
 static MHCUDAResult mhcuda_init_internal(
-    MinhashCudaGenerator *gen, uint32_t seed, const std::vector<int>& devs) {
+    MinhashCudaGenerator *gen, uint32_t seed, bool deferred,
+    const std::vector<int>& devs) {
   int verbosity = gen->verbosity;
   size_t const_size = gen->dim * gen->samples;
   CUMALLOC(gen->rs, const_size);
   CUMALLOC(gen->ln_cs, const_size);
   CUMALLOC(gen->betas, const_size);
+  FOR_EACH_DEV(
+    cudaDeviceProp props;
+    CUCH(cudaGetDeviceProperties(&props, dev), mhcudaRuntimeError);
+    gen->shmem_sizes.push_back(props.sharedMemPerBlock);
+    DEBUG("GPU #%" PRIu32 " has %d bytes of shared memory per block\n",
+          dev, gen->shmem_sizes.back());
+  );
+  if (deferred) {
+    return mhcudaSuccess;
+  }
   CUCH(cudaSetDevice(devs.back()), mhcudaNoSuchDevice);
   curandGenerator_t rndgen_;
   CURANDCH(curandCreateGenerator(&rndgen_, CURAND_RNG_PSEUDO_DEFAULT),
@@ -193,23 +204,16 @@ static MHCUDAResult mhcuda_init_internal(
     CUP2P(&gen->ln_cs, 0, const_size);
     CUP2P(&gen->betas, 0, const_size);
   );
-  FOR_EACH_DEV(
-    cudaDeviceProp props;
-    CUCH(cudaGetDeviceProperties(&props, dev), mhcudaRuntimeError);
-    gen->shmem_sizes.push_back(props.sharedMemPerBlock);
-    DEBUG("GPU #%" PRIu32 " has %d bytes of shared memory per block\n",
-          dev, gen->shmem_sizes.back());
-  );
   return mhcudaSuccess;
 }
 
 extern "C" {
 
 MinhashCudaGenerator *mhcuda_init(
-    uint32_t dim, uint16_t samples, uint32_t seed,
+    uint32_t dim, uint16_t samples, uint32_t seed, int deferred,
     uint32_t devices, int verbosity, MHCUDAResult *status) {
-  DEBUG("mhcuda_init: %" PRIu32 " %" PRIu16 " %" PRIu32 " %" PRIu32
-        " %d %p\n", dim, samples, seed, devices, verbosity, status);
+  DEBUG("mhcuda_init: %" PRIu32 " %" PRIu16 " %" PRIu32 " %d %" PRIu32
+        " %d %p\n", dim, samples, seed, deferred, devices, verbosity, status);
   if (dim == 0 || samples == 0) {
     if (status) *status = mhcudaInvalidArguments;
     return nullptr;
@@ -228,7 +232,7 @@ MinhashCudaGenerator *mhcuda_init(
       return nullptr; \
     } \
   } while(false)
-  CHECK_SUCCESS(mhcuda_init_internal(gen.get(), seed, devs));
+  CHECK_SUCCESS(mhcuda_init_internal(gen.get(), seed, deferred, devs));
   if (verbosity > 1) {
     CHECK_SUCCESS(print_memory_stats(devs));
   }
diff --git a/minhashcuda.h b/minhashcuda.h
@@ -63,14 +63,16 @@ enum MHCUDAResult {
 ///                but the larger the hash size and the longer to calculate (linear). Must not be prime
 ///                for performance considerations.
 /// @param seed The random generator seed for reproducible results.
+/// @param deferred Do not initialize the generator. Instead, expect the user to
+///                 call mhcuda_assign_random_vars() afterwards.
 /// @param devices Bitwise OR-ed CUDA device indices, e.g. 1 means first device, 2 means second device,
 ///                3 means using first and second device. Special value 0 enables all available devices.
 /// @param verbosity 0 means complete silence, 1 means mere progress logging, 2 means lots of output.
 /// @param status The pointer to the reported return code. May be nullptr. In case of any error, the
 ///               returned result is nullptr and the code is stored into *status (with nullptr check).
 /// @return The pointer to the allocated generator opaque struct.
 MinhashCudaGenerator* mhcuda_init(
-    uint32_t dim, uint16_t samples, uint32_t seed,
+    uint32_t dim, uint16_t samples, uint32_t seed, int deferred,
     uint32_t devices, int verbosity, MHCUDAResult *status) MALLOC;
 
 /// @brief Extracts the parameters for the specified Weighted MinHash generator.
diff --git a/python.cc b/python.cc
@@ -98,21 +98,22 @@ static PyObject *py_minhash_cuda_init(PyObject *self, PyObject *args,
                                       PyObject *kwargs) {
   uint32_t dim, seed = static_cast<uint32_t>(time(NULL)), devices = 0;
   uint16_t samples;
+  int deferred = false;
   int verbosity = 0;
   static const char *kwlist[] = {
-      "dim", "samples", "seed", "devices", "verbosity", NULL
+      "dim", "samples", "seed", "deferred", "devices", "verbosity", NULL
   };
 
   /* Parse the input tuple */
   if (!PyArg_ParseTupleAndKeywords(
-      args, kwargs, "IH|IIi", const_cast<char**>(kwlist), &dim, &samples,
-      &seed, &devices, &verbosity)) {
+      args, kwargs, "IH|IpIi", const_cast<char**>(kwlist), &dim, &samples,
+      &seed, &deferred, &devices, &verbosity)) {
     return NULL;
   }
   MHCUDAResult result = mhcudaSuccess;
   MinhashCudaGenerator *gen;
   Py_BEGIN_ALLOW_THREADS
-  gen = mhcuda_init(dim, samples, seed, devices, verbosity, &result);
+  gen = mhcuda_init(dim, samples, seed, deferred, devices, verbosity, &result);
   Py_END_ALLOW_THREADS
   switch (result) {
     case mhcudaInvalidArguments:
diff --git a/setup.py b/setup.py
@@ -46,7 +46,7 @@ def is_pure(self):
 setup(
     name="libMHCUDA",
     description="Accelerated Weighted MinHash-ing on GPU",
-    version="1.1.5",
+    version="2.0.0",
     license="MIT",
     author="Vadim Markovtsev",
     author_email="vadim@sourced.tech",
@@ -57,7 +57,7 @@ def is_pure(self):
     distclass=BinaryDistribution,
     cmdclass={'build_py': CMakeBuild},
     classifiers=[
-        "Development Status :: 4 - Beta",
+        "Development Status :: 5 - Production/Stable",
         "Intended Audience :: Developers",
         "License :: OSI Approved :: MIT License",
         "Operating System :: POSIX :: Linux",
diff --git a/test.py b/test.py
@@ -151,6 +151,35 @@ def test_backwards(self):
             print(hashes)
             raise e from None
 
+    def test_deferred(self):
+        v1 = [1, 0, 0, 0, 3, 4, 5, 0, 0, 0, 0, 6, 7, 8, 0, 0, 0, 0, 0, 0, 9, 10, 4]
+        v2 = [2, 0, 0, 0, 4, 3, 8, 0, 0, 0, 0, 4, 7, 10, 0, 0, 0, 0, 0, 0, 9, 0, 0]
+        gen = libMHCUDA.minhash_cuda_init(len(v1), 128, devices=1, verbosity=2)
+        vars = libMHCUDA.minhash_cuda_retrieve_vars(gen)
+        libMHCUDA.minhash_cuda_fini(gen)
+        gen = libMHCUDA.minhash_cuda_init(
+            len(v1), 128, devices=1, deferred=True, verbosity=2)
+        libMHCUDA.minhash_cuda_assign_vars(gen, *vars)
+        bgen = WeightedMinHashGenerator.__new__(WeightedMinHashGenerator)
+        bgen.dim = len(v1)
+        bgen.rs, bgen.ln_cs, bgen.betas = vars
+        bgen.sample_size = 128
+        bgen.seed = None
+        m = csr_matrix(numpy.array([v1, v2], dtype=numpy.float32))
+        hashes = libMHCUDA.minhash_cuda_calc(gen, m)
+        libMHCUDA.minhash_cuda_fini(gen)
+        self.assertEqual(hashes.shape, (2, 128, 2))
+        true_hashes = numpy.array([bgen.minhash(v1).hashvalues,
+                                   bgen.minhash(v2).hashvalues], dtype=numpy.uint32)
+        self.assertEqual(true_hashes.shape, (2, 128, 2))
+        try:
+            self.assertTrue((hashes == true_hashes).all())
+        except AssertionError as e:
+            print("---- TRUE ----")
+            print(true_hashes)
+            print("---- FALSE ----")
+            print(hashes)
+            raise e from None
 
 if __name__ == "__main__":
     unittest.main()