@@ -1393,7 +1393,7 @@ void TSDFVolumeGPU::fetchPointsNormals(OutputArray points, OutputArray normals)
1393
1393
(int )divUp (globalSize[2 ], (unsigned int )localSize[2 ]));
1394
1394
1395
1395
const size_t counterSize = sizeof (int );
1396
- size_t lsz = localSize[0 ]*localSize[1 ]*localSize[2 ]*counterSize;
1396
+ size_t lszscan = localSize[0 ]*localSize[1 ]*localSize[2 ]*counterSize;
1397
1397
1398
1398
const int gsz[3 ] = {ngroups[2 ], ngroups[1 ], ngroups[0 ]};
1399
1399
UMat groupedSum (3 , gsz, CV_32S, Scalar (0 ));
@@ -1409,7 +1409,7 @@ void TSDFVolumeGPU::fetchPointsNormals(OutputArray points, OutputArray normals)
1409
1409
ocl::KernelArg::PtrReadOnly (volPoseGpu),
1410
1410
voxelSize,
1411
1411
voxelSizeInv,
1412
- ocl::KernelArg::Local (lsz ),
1412
+ ocl::KernelArg::Local (lszscan ),
1413
1413
ocl::KernelArg::WriteOnlyNoSize (groupedSum));
1414
1414
1415
1415
if (!kscan.run (3 , globalSize, localSize, true ))
@@ -1422,12 +1422,6 @@ void TSDFVolumeGPU::fetchPointsNormals(OutputArray points, OutputArray normals)
1422
1422
1423
1423
// 2. fill output arrays according to per-group points count
1424
1424
1425
- ocl::Kernel kfill;
1426
- kfill.create (" fillPtsNrm" , source, options, &errorStr);
1427
-
1428
- if (kfill.empty ())
1429
- throw std::runtime_error (" Failed to create kernel: " + errorStr);
1430
-
1431
1425
points.create (gpuSum, 1 , POINT_TYPE);
1432
1426
UMat pts = points.getUMat ();
1433
1427
UMat nrm;
@@ -1438,31 +1432,41 @@ void TSDFVolumeGPU::fetchPointsNormals(OutputArray points, OutputArray normals)
1438
1432
}
1439
1433
else
1440
1434
{
1441
- // it won't access but empty args are forbidden
1435
+ // it won't be accessed but empty args are forbidden
1442
1436
nrm = UMat (1 , 1 , POINT_TYPE);
1443
1437
}
1444
- UMat atomicCtr (1 , 1 , CV_32S, Scalar (0 ));
1445
-
1446
- // mem size to keep pts (and normals optionally) for all work-items in a group
1447
- lsz = localSize[0 ]*localSize[1 ]*localSize[2 ]*elemSize;
1448
1438
1449
- kfill.args (ocl::KernelArg::PtrReadOnly (volume),
1450
- volResGpu.val ,
1451
- volDims.val ,
1452
- neighbourCoords.val ,
1453
- ocl::KernelArg::PtrReadOnly (volPoseGpu),
1454
- voxelSize,
1455
- voxelSizeInv,
1456
- ((int )needNormals),
1457
- ocl::KernelArg::Local (lsz),
1458
- ocl::KernelArg::PtrReadWrite (atomicCtr),
1459
- ocl::KernelArg::ReadOnlyNoSize (groupedSum),
1460
- ocl::KernelArg::WriteOnlyNoSize (pts),
1461
- ocl::KernelArg::WriteOnlyNoSize (nrm)
1462
- );
1463
-
1464
- if (!kfill.run (3 , globalSize, localSize, true ))
1465
- throw std::runtime_error (" Failed to run kernel" );
1439
+ if (gpuSum)
1440
+ {
1441
+ ocl::Kernel kfill;
1442
+ kfill.create (" fillPtsNrm" , source, options, &errorStr);
1443
+
1444
+ if (kfill.empty ())
1445
+ throw std::runtime_error (" Failed to create kernel: " + errorStr);
1446
+
1447
+ UMat atomicCtr (1 , 1 , CV_32S, Scalar (0 ));
1448
+
1449
+ // mem size to keep pts (and normals optionally) for all work-items in a group
1450
+ size_t lszfill = localSize[0 ]*localSize[1 ]*localSize[2 ]*elemSize;
1451
+
1452
+ kfill.args (ocl::KernelArg::PtrReadOnly (volume),
1453
+ volResGpu.val ,
1454
+ volDims.val ,
1455
+ neighbourCoords.val ,
1456
+ ocl::KernelArg::PtrReadOnly (volPoseGpu),
1457
+ voxelSize,
1458
+ voxelSizeInv,
1459
+ ((int )needNormals),
1460
+ ocl::KernelArg::Local (lszfill),
1461
+ ocl::KernelArg::PtrReadWrite (atomicCtr),
1462
+ ocl::KernelArg::ReadOnlyNoSize (groupedSum),
1463
+ ocl::KernelArg::WriteOnlyNoSize (pts),
1464
+ ocl::KernelArg::WriteOnlyNoSize (nrm)
1465
+ );
1466
+
1467
+ if (!kfill.run (3 , globalSize, localSize, true ))
1468
+ throw std::runtime_error (" Failed to run kernel" );
1469
+ }
1466
1470
}
1467
1471
}
1468
1472
0 commit comments