Skip to content

Commit 4bae74e

Browse files
committed
Merge branch 'develop'
2 parents 68d52cb + b5a9afc commit 4bae74e

File tree

5 files changed

+34
-25
lines changed

5 files changed

+34
-25
lines changed

CMakeLists.txt

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,22 +10,26 @@ if (PROFILE OR CMAKE_BUILD_TYPE STREQUAL "Debug")
1010
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DPROFILE")
1111
endif()
1212
#set(CMAKE_VERBOSE_MAKEFILE on)
13-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native -Wall -Werror -std=c++11 ${OpenMP_CXX_FLAGS}")
13+
if (NOT DEFINED CUDA_ARCH)
14+
set(CUDA_ARCH "61")
15+
endif()
16+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native -Wall -Werror -DCUDA_ARCH=${CUDA_ARCH} -std=c++11 ${OpenMP_CXX_FLAGS}")
1417
set(SOURCE_FILES minhashcuda.cc minhashcuda.h wrappers.h private.h kernel.cu)
1518
if (NOT DISABLE_PYTHON)
1619
list(APPEND SOURCE_FILES python.cc)
1720
endif()
1821
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
1922
set(NVCC_FLAGS "-G -g")
2023
endif()
21-
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -arch=compute_52 -code=sm_52 -Xptxas=-v -D_MWAITXINTRIN_H_INCLUDED -D_FORCE_INLINES")
24+
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -arch=compute_${CUDA_ARCH} -Xptxas=-v -D_MWAITXINTRIN_H_INCLUDED -D_FORCE_INLINES")
2225
if (CMAKE_MAJOR_VERSION LESS 4 AND CMAKE_MINOR_VERSION LESS 3)
2326
# workaround https://github.com/Kitware/CMake/commit/99abebdea01b9ef73e091db5594553f7b1694a1b
2427
message(STATUS "Applied CUDA C++11 workaround on CMake < 3.3")
2528
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --std c++11")
2629
endif()
2730
cuda_add_library(MHCUDA SHARED ${SOURCE_FILES} OPTIONS ${NVCC_FLAGS})
31+
target_link_libraries(MHCUDA ${CUDA_curand_LIBRARY})
2832
if(PYTHONLIBS_FOUND)
2933
include_directories(${PYTHON_INCLUDE_DIRS} ${NUMPY_INCLUDES})
30-
target_link_libraries(MHCUDA ${PYTHON_LIBRARIES} ${CUDA_curand_LIBRARY})
31-
endif()
34+
target_link_libraries(MHCUDA ${PYTHON_LIBRARIES})
35+
endif()

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
[![Build Status](https://travis-ci.org/src-d/minhashcuda.svg?branch=master)](https://travis-ci.org/src-d/minhashcuda) [![PyPI](https://img.shields.io/pypi/v/libMHCUDA.svg)](https://pypi.python.org/pypi/libMHCUDA)
1+
[![Build Status](https://travis-ci.org/src-d/minhashcuda.svg?branch=master)](https://travis-ci.org/src-d/minhashcuda) [![PyPI](https://img.shields.io/pypi/v/libMHCUDA.svg)](https://pypi.python.org/pypi/libMHCUDA) [![10.5281/zenodo.286955](https://zenodo.org/badge/DOI/10.5281/zenodo.286955.svg)](https://doi.org/10.5281/zenodo.286955)
22

33
MinHashCuda
44
===========

minhashcuda.cc

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -40,24 +40,24 @@ struct MinhashCudaGenerator_ {
4040
} // extern "C"
4141

4242

43-
static std::vector<int> setup_devices(uint32_t device, int verbosity) {
43+
static std::vector<int> setup_devices(uint32_t devices, int verbosity) {
4444
std::vector<int> devs;
45-
if (device == 0) {
46-
cudaGetDeviceCount(reinterpret_cast<int *>(&device));
47-
if (device == 0) {
45+
if (devices == 0) {
46+
cudaGetDeviceCount(reinterpret_cast<int *>(&devices));
47+
if (devices == 0) {
4848
return std::move(devs);
4949
}
50-
device = (1u << device) - 1;
50+
devices = (1u << devices) - 1;
5151
}
52-
for (int dev = 0; device; dev++) {
53-
if (device & 1) {
52+
for (int dev = 0; devices; dev++) {
53+
if (devices & 1) {
5454
devs.push_back(dev);
5555
if (cudaSetDevice(dev) != cudaSuccess) {
5656
INFO("failed to validate device %d", dev);
5757
devs.pop_back();
5858
}
5959
}
60-
device >>= 1;
60+
devices >>= 1;
6161
}
6262
if (devs.size() > 1) {
6363
for (int dev1 : devs) {
@@ -268,10 +268,11 @@ MHCUDAResult mhcuda_assign_random_vars(
268268
} // extern "C"
269269

270270
static std::vector<uint32_t> calc_best_split(
271-
const MinhashCudaGenerator *gen, const uint32_t *rows, uint32_t length) {
272-
uint32_t ideal_split = rows[length] / gen->devs.size();
271+
const uint32_t *rows, uint32_t length, const std::vector<int> &devs,
272+
const std::vector<uint32_t> &sizes) {
273+
uint32_t ideal_split = rows[length] / devs.size();
273274
std::vector<std::vector<uint32_t>> variants;
274-
for (size_t devi = 0; devi < gen->devs.size(); devi++) {
275+
for (size_t devi = 0; devi < devs.size(); devi++) {
275276
uint32_t row = std::upper_bound(
276277
rows, rows + length + 1, ideal_split * (devi + 1)) - rows;
277278
std::vector<std::vector<uint32_t>> fork;
@@ -301,9 +302,9 @@ static std::vector<uint32_t> calc_best_split(
301302
uint32_t min_cost = 0xFFFFFFFFu;
302303
for (auto &v : variants) {
303304
uint32_t cost = 0;
304-
for (size_t i = 0; i < gen->devs.size(); i++) {
305+
for (size_t i = 0; i < devs.size(); i++) {
305306
uint32_t row = v[i], prev_row = (i > 0)? v[i - 1] : 0;
306-
uint32_t diff = rows[row] - rows[prev_row] - gen->sizes[i];
307+
uint32_t diff = rows[row] - rows[prev_row] - sizes[i];
307308
if (diff > 0) {
308309
cost += diff * diff;
309310
}
@@ -522,7 +523,7 @@ MHCUDAResult mhcuda_calc(
522523
rows, length, output);
523524
auto &devs = gen->devs;
524525
INFO("Preparing...\n");
525-
std::vector<uint32_t> split = calc_best_split(gen, rows, length);
526+
std::vector<uint32_t> split = calc_best_split(rows, length, gen->devs, gen->sizes);
526527
if (verbosity > 1) {
527528
dump_vector(split, "split");
528529
}

python.cc

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -68,15 +68,19 @@ template <typename O>
6868
using pyobj_parent = std::unique_ptr<O, std::function<void(O*)>>;
6969

7070
template <typename O>
71-
class pyobj : public pyobj_parent<O> {
71+
class _pyobj : public pyobj_parent<O> {
7272
public:
73-
pyobj() : pyobj_parent<O>(
74-
nullptr, [](PyObject *p){ if (p) Py_DECREF(p); }) {}
75-
explicit pyobj(PyObject *ptr) : pyobj_parent<O>(
73+
_pyobj() : pyobj_parent<O>(
74+
nullptr, [](O *p){ if (p) Py_DECREF(p); }) {}
75+
explicit _pyobj(PyObject *ptr) : pyobj_parent<O>(
7676
reinterpret_cast<O *>(ptr), [](O *p){ if(p) Py_DECREF(p); }) {}
77+
void reset(PyObject *p) noexcept {
78+
pyobj_parent<O>::reset(reinterpret_cast<O*>(p));
79+
}
7780
};
7881

79-
using pyarray = pyobj<PyArrayObject>;
82+
using pyobj = _pyobj<PyObject>;
83+
using pyarray = _pyobj<PyArrayObject>;
8084

8185
static void set_cuda_malloc_error() {
8286
PyErr_SetString(PyExc_MemoryError, "Failed to allocate memory on GPU");

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def is_pure(self):
4646
setup(
4747
name="libMHCUDA",
4848
description="Accelerated Weighted MinHash-ing on GPU",
49-
version="1.1.0",
49+
version="1.1.1",
5050
license="MIT",
5151
author="Vadim Markovtsev",
5252
author_email="[email protected]",

0 commit comments

Comments
 (0)