Skip to content

Commit 825cd31

Browse files
committed
Add "deferred" argument
Signed-off-by: Vadim Markovtsev <[email protected]>
1 parent 77e056e commit 825cd31

File tree

6 files changed

+65
-21
lines changed

6 files changed

+65
-21
lines changed

README.md

+10-2
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ Python API
9090
Import "libMHCUDA".
9191

9292
```python
93-
def minhash_cuda_init(dim, samples, seed=time(), devices=0, verbosity=0)
93+
def minhash_cuda_init(dim, samples, seed=time(), deferred=False, devices=0, verbosity=0)
9494
```
9595
Creates the hasher.
9696

@@ -103,6 +103,10 @@ Creates the hasher.
103103

104104
**seed** integer, the random generator seed for reproducible results.
105105

106+
**deferred** boolean, if True, disables the initialization of WMH parameters with
107+
random numbers. In that case, the user is expected to call
108+
minhash_cuda_assign_random_vars() afterwards.
109+
106110
**devices** integer, bitwise OR-ed CUDA device indices, e.g. 1 means first device, 2 means second device,
107111
3 means using first and second device. Special value 0 enables all available devices.
108112
Default value is 0.
@@ -143,7 +147,7 @@ Include "minhashcuda.h".
143147

144148
```C
145149
MinhashCudaGenerator* mhcuda_init(
146-
uint32_t dim, uint16_t samples, uint32_t seed,
150+
uint32_t dim, uint16_t samples, uint32_t seed, int deferred,
147151
uint32_t devices, int verbosity, MHCUDAResult *status)
148152
```
149153
Initializes the Weighted MinHash generator.
@@ -156,6 +160,10 @@ Initializes the Weighted MinHash generator.
156160
157161
**seed** the random generator seed for reproducible results.
158162
163+
**deferred** if set to anything except 0, disables the initialization of WMH parameters with
164+
random numbers. In that case, the user is expected to call
165+
mhcuda_assign_random_vars() afterwards.
166+
159167
**devices** bitwise OR-ed CUDA device indices, e.g. 1 means first device, 2 means second device,
160168
3 means using first and second device. Special value 0 enables all available devices.
161169

minhashcuda.cc

+16-12
Original file line numberDiff line numberDiff line change
@@ -159,12 +159,23 @@ class CurandGenerator : public unique_devptr_parent<curandGenerator_st> {
159159
};
160160

161161
static MHCUDAResult mhcuda_init_internal(
162-
MinhashCudaGenerator *gen, uint32_t seed, const std::vector<int>& devs) {
162+
MinhashCudaGenerator *gen, uint32_t seed, bool deferred,
163+
const std::vector<int>& devs) {
163164
int verbosity = gen->verbosity;
164165
size_t const_size = gen->dim * gen->samples;
165166
CUMALLOC(gen->rs, const_size);
166167
CUMALLOC(gen->ln_cs, const_size);
167168
CUMALLOC(gen->betas, const_size);
169+
FOR_EACH_DEV(
170+
cudaDeviceProp props;
171+
CUCH(cudaGetDeviceProperties(&props, dev), mhcudaRuntimeError);
172+
gen->shmem_sizes.push_back(props.sharedMemPerBlock);
173+
DEBUG("GPU #%" PRIu32 " has %d bytes of shared memory per block\n",
174+
dev, gen->shmem_sizes.back());
175+
);
176+
if (deferred) {
177+
return mhcudaSuccess;
178+
}
168179
CUCH(cudaSetDevice(devs.back()), mhcudaNoSuchDevice);
169180
curandGenerator_t rndgen_;
170181
CURANDCH(curandCreateGenerator(&rndgen_, CURAND_RNG_PSEUDO_DEFAULT),
@@ -193,23 +204,16 @@ static MHCUDAResult mhcuda_init_internal(
193204
CUP2P(&gen->ln_cs, 0, const_size);
194205
CUP2P(&gen->betas, 0, const_size);
195206
);
196-
FOR_EACH_DEV(
197-
cudaDeviceProp props;
198-
CUCH(cudaGetDeviceProperties(&props, dev), mhcudaRuntimeError);
199-
gen->shmem_sizes.push_back(props.sharedMemPerBlock);
200-
DEBUG("GPU #%" PRIu32 " has %d bytes of shared memory per block\n",
201-
dev, gen->shmem_sizes.back());
202-
);
203207
return mhcudaSuccess;
204208
}
205209

206210
extern "C" {
207211

208212
MinhashCudaGenerator *mhcuda_init(
209-
uint32_t dim, uint16_t samples, uint32_t seed,
213+
uint32_t dim, uint16_t samples, uint32_t seed, int deferred,
210214
uint32_t devices, int verbosity, MHCUDAResult *status) {
211-
DEBUG("mhcuda_init: %" PRIu32 " %" PRIu16 " %" PRIu32 " %" PRIu32
212-
" %d %p\n", dim, samples, seed, devices, verbosity, status);
215+
DEBUG("mhcuda_init: %" PRIu32 " %" PRIu16 " %" PRIu32 " %d %" PRIu32
216+
" %d %p\n", dim, samples, seed, deferred, devices, verbosity, status);
213217
if (dim == 0 || samples == 0) {
214218
if (status) *status = mhcudaInvalidArguments;
215219
return nullptr;
@@ -228,7 +232,7 @@ MinhashCudaGenerator *mhcuda_init(
228232
return nullptr; \
229233
} \
230234
} while(false)
231-
CHECK_SUCCESS(mhcuda_init_internal(gen.get(), seed, devs));
235+
CHECK_SUCCESS(mhcuda_init_internal(gen.get(), seed, deferred, devs));
232236
if (verbosity > 1) {
233237
CHECK_SUCCESS(print_memory_stats(devs));
234238
}

minhashcuda.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -63,14 +63,16 @@ enum MHCUDAResult {
6363
/// but the larger the hash size and the longer to calculate (linear). Must not be prime
6464
/// for performance considerations.
6565
/// @param seed The random generator seed for reproducible results.
66+
/// @param deferred Do not initialize the generator. Instead, expect the user to
67+
/// call mhcuda_assign_random_vars() afterwards.
6668
/// @param devices Bitwise OR-ed CUDA device indices, e.g. 1 means first device, 2 means second device,
6769
/// 3 means using first and second device. Special value 0 enables all available devices.
6870
/// @param verbosity 0 means complete silence, 1 means mere progress logging, 2 means lots of output.
6971
/// @param status The pointer to the reported return code. May be nullptr. In case of any error, the
7072
/// returned result is nullptr and the code is stored into *status (with nullptr check).
7173
/// @return The pointer to the allocated generator opaque struct.
7274
MinhashCudaGenerator* mhcuda_init(
73-
uint32_t dim, uint16_t samples, uint32_t seed,
75+
uint32_t dim, uint16_t samples, uint32_t seed, int deferred,
7476
uint32_t devices, int verbosity, MHCUDAResult *status) MALLOC;
7577

7678
/// @brief Extracts the parameters for the specified Weighted MinHash generator.

python.cc

+5-4
Original file line numberDiff line numberDiff line change
@@ -98,21 +98,22 @@ static PyObject *py_minhash_cuda_init(PyObject *self, PyObject *args,
9898
PyObject *kwargs) {
9999
uint32_t dim, seed = static_cast<uint32_t>(time(NULL)), devices = 0;
100100
uint16_t samples;
101+
int deferred = false;
101102
int verbosity = 0;
102103
static const char *kwlist[] = {
103-
"dim", "samples", "seed", "devices", "verbosity", NULL
104+
"dim", "samples", "seed", "deferred", "devices", "verbosity", NULL
104105
};
105106

106107
/* Parse the input tuple */
107108
if (!PyArg_ParseTupleAndKeywords(
108-
args, kwargs, "IH|IIi", const_cast<char**>(kwlist), &dim, &samples,
109-
&seed, &devices, &verbosity)) {
109+
args, kwargs, "IH|IpIi", const_cast<char**>(kwlist), &dim, &samples,
110+
&seed, &deferred, &devices, &verbosity)) {
110111
return NULL;
111112
}
112113
MHCUDAResult result = mhcudaSuccess;
113114
MinhashCudaGenerator *gen;
114115
Py_BEGIN_ALLOW_THREADS
115-
gen = mhcuda_init(dim, samples, seed, devices, verbosity, &result);
116+
gen = mhcuda_init(dim, samples, seed, deferred, devices, verbosity, &result);
116117
Py_END_ALLOW_THREADS
117118
switch (result) {
118119
case mhcudaInvalidArguments:

setup.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def is_pure(self):
4646
setup(
4747
name="libMHCUDA",
4848
description="Accelerated Weighted MinHash-ing on GPU",
49-
version="1.1.5",
49+
version="2.0.0",
5050
license="MIT",
5151
author="Vadim Markovtsev",
5252
author_email="[email protected]",
@@ -57,7 +57,7 @@ def is_pure(self):
5757
distclass=BinaryDistribution,
5858
cmdclass={'build_py': CMakeBuild},
5959
classifiers=[
60-
"Development Status :: 4 - Beta",
60+
"Development Status :: 5 - Production/Stable",
6161
"Intended Audience :: Developers",
6262
"License :: OSI Approved :: MIT License",
6363
"Operating System :: POSIX :: Linux",

test.py

+29
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,35 @@ def test_backwards(self):
151151
print(hashes)
152152
raise e from None
153153

154+
def test_deferred(self):
155+
v1 = [1, 0, 0, 0, 3, 4, 5, 0, 0, 0, 0, 6, 7, 8, 0, 0, 0, 0, 0, 0, 9, 10, 4]
156+
v2 = [2, 0, 0, 0, 4, 3, 8, 0, 0, 0, 0, 4, 7, 10, 0, 0, 0, 0, 0, 0, 9, 0, 0]
157+
gen = libMHCUDA.minhash_cuda_init(len(v1), 128, devices=1, verbosity=2)
158+
vars = libMHCUDA.minhash_cuda_retrieve_vars(gen)
159+
libMHCUDA.minhash_cuda_fini(gen)
160+
gen = libMHCUDA.minhash_cuda_init(
161+
len(v1), 128, devices=1, deferred=True, verbosity=2)
162+
libMHCUDA.minhash_cuda_assign_vars(gen, *vars)
163+
bgen = WeightedMinHashGenerator.__new__(WeightedMinHashGenerator)
164+
bgen.dim = len(v1)
165+
bgen.rs, bgen.ln_cs, bgen.betas = vars
166+
bgen.sample_size = 128
167+
bgen.seed = None
168+
m = csr_matrix(numpy.array([v1, v2], dtype=numpy.float32))
169+
hashes = libMHCUDA.minhash_cuda_calc(gen, m)
170+
libMHCUDA.minhash_cuda_fini(gen)
171+
self.assertEqual(hashes.shape, (2, 128, 2))
172+
true_hashes = numpy.array([bgen.minhash(v1).hashvalues,
173+
bgen.minhash(v2).hashvalues], dtype=numpy.uint32)
174+
self.assertEqual(true_hashes.shape, (2, 128, 2))
175+
try:
176+
self.assertTrue((hashes == true_hashes).all())
177+
except AssertionError as e:
178+
print("---- TRUE ----")
179+
print(true_hashes)
180+
print("---- FALSE ----")
181+
print(hashes)
182+
raise e from None
154183

155184
if __name__ == "__main__":
156185
unittest.main()

0 commit comments

Comments
 (0)