diff --git a/modules/rgbd/src/tsdf.cpp b/modules/rgbd/src/tsdf.cpp index 3fd3b106457..c60a7ccc915 100644 --- a/modules/rgbd/src/tsdf.cpp +++ b/modules/rgbd/src/tsdf.cpp @@ -1393,7 +1393,7 @@ void TSDFVolumeGPU::fetchPointsNormals(OutputArray points, OutputArray normals) (int)divUp(globalSize[2], (unsigned int)localSize[2])); const size_t counterSize = sizeof(int); - size_t lsz = localSize[0]*localSize[1]*localSize[2]*counterSize; + size_t lszscan = localSize[0]*localSize[1]*localSize[2]*counterSize; const int gsz[3] = {ngroups[2], ngroups[1], ngroups[0]}; UMat groupedSum(3, gsz, CV_32S, Scalar(0)); @@ -1409,7 +1409,7 @@ void TSDFVolumeGPU::fetchPointsNormals(OutputArray points, OutputArray normals) ocl::KernelArg::PtrReadOnly(volPoseGpu), voxelSize, voxelSizeInv, - ocl::KernelArg::Local(lsz), + ocl::KernelArg::Local(lszscan), ocl::KernelArg::WriteOnlyNoSize(groupedSum)); if(!kscan.run(3, globalSize, localSize, true)) @@ -1422,12 +1422,6 @@ void TSDFVolumeGPU::fetchPointsNormals(OutputArray points, OutputArray normals) // 2. fill output arrays according to per-group points count - ocl::Kernel kfill; - kfill.create("fillPtsNrm", source, options, &errorStr); - - if(kfill.empty()) - throw std::runtime_error("Failed to create kernel: " + errorStr); - points.create(gpuSum, 1, POINT_TYPE); UMat pts = points.getUMat(); UMat nrm; @@ -1438,31 +1432,41 @@ void TSDFVolumeGPU::fetchPointsNormals(OutputArray points, OutputArray normals) } else { - // it won't access but empty args are forbidden + // it won't be accessed but empty args are forbidden nrm = UMat(1, 1, POINT_TYPE); } - UMat atomicCtr(1, 1, CV_32S, Scalar(0)); - - // mem size to keep pts (and normals optionally) for all work-items in a group - lsz = localSize[0]*localSize[1]*localSize[2]*elemSize; - kfill.args(ocl::KernelArg::PtrReadOnly(volume), - volResGpu.val, - volDims.val, - neighbourCoords.val, - ocl::KernelArg::PtrReadOnly(volPoseGpu), - voxelSize, - voxelSizeInv, - ((int)needNormals), - ocl::KernelArg::Local(lsz), - ocl::KernelArg::PtrReadWrite(atomicCtr), - ocl::KernelArg::ReadOnlyNoSize(groupedSum), - ocl::KernelArg::WriteOnlyNoSize(pts), - ocl::KernelArg::WriteOnlyNoSize(nrm) - ); - - if(!kfill.run(3, globalSize, localSize, true)) - throw std::runtime_error("Failed to run kernel"); + if (gpuSum) + { + ocl::Kernel kfill; + kfill.create("fillPtsNrm", source, options, &errorStr); + + if(kfill.empty()) + throw std::runtime_error("Failed to create kernel: " + errorStr); + + UMat atomicCtr(1, 1, CV_32S, Scalar(0)); + + // mem size to keep pts (and normals optionally) for all work-items in a group + size_t lszfill = localSize[0]*localSize[1]*localSize[2]*elemSize; + + kfill.args(ocl::KernelArg::PtrReadOnly(volume), + volResGpu.val, + volDims.val, + neighbourCoords.val, + ocl::KernelArg::PtrReadOnly(volPoseGpu), + voxelSize, + voxelSizeInv, + ((int)needNormals), + ocl::KernelArg::Local(lszfill), + ocl::KernelArg::PtrReadWrite(atomicCtr), + ocl::KernelArg::ReadOnlyNoSize(groupedSum), + ocl::KernelArg::WriteOnlyNoSize(pts), + ocl::KernelArg::WriteOnlyNoSize(nrm) + ); + + if(!kfill.run(3, globalSize, localSize, true)) + throw std::runtime_error("Failed to run kernel"); + } } }