Skip to content

Add FastCV DSP Initialization, QcAllocator and FastCV DSP Extension APIs #3931

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 9 commits into
base: 4.x
Choose a base branch
from
2 changes: 1 addition & 1 deletion modules/fastcv/include/opencv2/fastcv/blur_dsp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ namespace dsp {
*
* @sa Filter2D
*/
CV_EXPORTS_W void filter2D(InputArray _src, OutputArray _dst, int ddepth, InputArray _kernel);
CV_EXPORTS void filter2D(InputArray _src, OutputArray _dst, int ddepth, InputArray _kernel);

//! @}
} // dsp::
Expand Down
4 changes: 2 additions & 2 deletions modules/fastcv/include/opencv2/fastcv/dsp_init.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ namespace dsp {
*
* @return int Returns 0 on success, and a non-zero value on failure.
*/
CV_EXPORTS_W int fcvdspinit();
CV_EXPORTS int fcvdspinit();

/**
* @brief Deinitializes the FastCV DSP environment.
Expand All @@ -39,7 +39,7 @@ CV_EXPORTS_W int fcvdspinit();
* @note This function must be called at the end of the use case or program, after all DSP-related
* operations are complete.
*/
CV_EXPORTS_W void fcvdspdeinit();
CV_EXPORTS void fcvdspdeinit();
//! @}

} // dsp::
Expand Down
2 changes: 1 addition & 1 deletion modules/fastcv/include/opencv2/fastcv/edges_dsp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ namespace dsp {
* @param apertureSize The Sobel kernel size for calculating gradient. Supported sizes are 3, 5 and 7.
* @param L2gradient L2 Gradient or L1 Gradient
*/
CV_EXPORTS_W void canny(InputArray _src, OutputArray _dst, int lowThreshold, int highThreshold, int apertureSize = 3, bool L2gradient = false);
CV_EXPORTS void Canny(InputArray _src, OutputArray _dst, int lowThreshold, int highThreshold, int apertureSize = 3, bool L2gradient = false);
//! @}

} // dsp::
Expand Down
4 changes: 2 additions & 2 deletions modules/fastcv/include/opencv2/fastcv/fft_dsp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ namespace dsp {
* @param dst The computed FFT matrix of type CV_32FC2. The FFT Re and Im coefficients are stored in different channels.
Hence the dimensions of the dst are (srcWidth, srcHeight)
*/
CV_EXPORTS_W void FFT(InputArray src, OutputArray dst);
CV_EXPORTS void FFT(InputArray src, OutputArray dst);

/**
* @brief Computes the 1D or 2D Inverse Fast Fourier Transform of a complex valued matrix.
Expand All @@ -38,7 +38,7 @@ CV_EXPORTS_W void FFT(InputArray src, OutputArray dst);
* @param dst The computed IFFT matrix of type CV_8U. The matrix is real valued and has no imaginary components.
Hence the dimensions of the dst are (srcWidth , srcHeight)
*/
CV_EXPORTS_W void IFFT(InputArray src, OutputArray dst);
CV_EXPORTS void IFFT(InputArray src, OutputArray dst);

//! @}

Expand Down
2 changes: 1 addition & 1 deletion modules/fastcv/include/opencv2/fastcv/sad_dsp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ namespace dsp {
* @param _src The input image data, type CV_8UC1
* @param _dst The output image data, type CV_16UC1
*/
CV_EXPORTS_W void sumOfAbsoluteDiffs(cv::InputArray _patch, cv::InputArray _src, cv::OutputArray _dst);
CV_EXPORTS void sumOfAbsoluteDiffs(cv::InputArray _patch, cv::InputArray _src, cv::OutputArray _dst);
//! @}

} // dsp::
Expand Down
36 changes: 18 additions & 18 deletions modules/fastcv/include/opencv2/fastcv/thresh_dsp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,26 +12,26 @@ namespace cv {
namespace fastcv {
namespace dsp {

//! @addtogroup fastcv
//! @{
//! @addtogroup fastcv
//! @{

/**
* @brief Binarizes a grayscale image using Otsu's method.
* Sets the pixel to max(255) if it's value is greater than the threshold;
* else, set the pixel to min(0). The threshold is searched that minimizes
* the intra-class variance (the variance within the class).
*
* @param _src Input 8-bit grayscale image. Size of buffer is srcStride*srcHeight bytes.
* @param _dst Output 8-bit binarized image. Size of buffer is dstStride*srcHeight bytes.
* @param type Threshold type that can be either 0 or 1.
* NOTE: For threshold type=0, the pixel is set as
* maxValue if it's value is greater than the threshold; else, it is set as zero.
* For threshold type=1, the pixel is set as zero if it's
* value is greater than the threshold; else, it is set as maxValue.
*/
CV_EXPORTS_W void thresholdOtsu(InputArray _src, OutputArray _dst, bool type);
/**
* @brief Binarizes a grayscale image using Otsu's method.
* Sets the pixel to max(255) if it's value is greater than the threshold;
* else, set the pixel to min(0). The threshold is searched that minimizes
* the intra-class variance (the variance within the class).
*
* @param _src Input 8-bit grayscale image. Size of buffer is srcStride*srcHeight bytes.
* @param _dst Output 8-bit binarized image. Size of buffer is dstStride*srcHeight bytes.
* @param type Threshold type that can be either 0 or 1.
* NOTE: For threshold type=0, the pixel is set as
* maxValue if it's value is greater than the threshold; else, it is set as zero.
* For threshold type=1, the pixel is set as zero if it's
* value is greater than the threshold; else, it is set as maxValue.
*/
CV_EXPORTS void thresholdOtsu(InputArray _src, OutputArray _dst, bool type);

//! @}
//! @}
} // dsp::
} // fastcv::
} // cv::
Expand Down
2 changes: 1 addition & 1 deletion modules/fastcv/perf/perf_edges_dsp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ PERF_TEST_P(CannyPerfTest, run,
while (next())
{
startTimer();
cv::fastcv::dsp::canny(src, dst, lowThreshold, highThreshold, apertureSize, L2gradient);
cv::fastcv::dsp::Canny(src, dst, lowThreshold, highThreshold, apertureSize, L2gradient);
stopTimer();
}

Expand Down
8 changes: 0 additions & 8 deletions modules/fastcv/src/allocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,6 @@ cv::UMatData* QcAllocator::allocate(int dims, const int* sizes, int type,
u->size = total;
if(data0)
u->flags |= cv::UMatData::USER_ALLOCATED;

u->userdata = new std::string("QCOM");

// Add to active allocations
cv::fastcv::QcResourceManager::getInstance().addAllocation(data);
Expand Down Expand Up @@ -94,12 +92,6 @@ void QcAllocator::deallocate(cv::UMatData* u) const
u->origdata = 0;
}

if (u->userdata)
{
delete static_cast<std::string*>(u->userdata);
u->userdata = nullptr;
}

delete u;
}

Expand Down
90 changes: 11 additions & 79 deletions modules/fastcv/src/blur_dsp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,75 +9,6 @@ namespace cv {
namespace fastcv {
namespace dsp {

class FcvFilter2DLoop_Invoker : public ParallelLoopBody
{
public:

FcvFilter2DLoop_Invoker(const Mat& _src, Mat& _dst, const Mat& _kernel) :
ParallelLoopBody(), src(_src), dst(_dst), kernel(_kernel)
{
width = src.cols;
height = src.rows;
ksize = kernel.size().width;
halfKsize = ksize/2;
}

virtual void operator()(const Range& range) const CV_OVERRIDE
{
int topLines = 0;
int rangeHeight = range.end-range.start;
int paddedHeight = rangeHeight;

if(range.start >= halfKsize)
{
topLines += halfKsize;
paddedHeight += halfKsize;
}

if(range.end <= height-halfKsize)
{
paddedHeight += halfKsize;
}

Mat srcPadded, dstPadded;
srcPadded.allocator = cv::fastcv::getQcAllocator();
dstPadded.allocator = cv::fastcv::getQcAllocator();

srcPadded = src(Rect(0, range.start - topLines, width, paddedHeight));
dstPadded.create(paddedHeight, width, dst.depth());

CV_Assert(IS_FASTCV_ALLOCATED(srcPadded));
CV_Assert(IS_FASTCV_ALLOCATED(dstPadded));

if (dst.depth() == CV_8U)
fcvFilterCorrNxNu8Q((int8_t*)kernel.data, ksize, 0, srcPadded.data, width, paddedHeight, srcPadded.step,
dstPadded.data, dstPadded.step);
else if (dst.depth() == CV_16S)
fcvFilterCorrNxNu8s16Q((int8_t*)kernel.data, ksize, 0, srcPadded.data, width, paddedHeight, srcPadded.step,
(int16_t*)dstPadded.data, dstPadded.step);
else if (dst.depth() == CV_32F)
fcvFilterCorrNxNu8f32Q((float32_t*)kernel.data, ksize, srcPadded.data, width, paddedHeight, srcPadded.step,
(float32_t*)dstPadded.data, dstPadded.step);

// Only copy center part back to output image and ignore the padded lines
Mat temp1 = dstPadded(Rect(0, topLines, width, rangeHeight));
Mat temp2 = dst(Rect(0, range.start, width, rangeHeight));
temp1.copyTo(temp2);
}

private:
const Mat& src;
Mat& dst;
const Mat& kernel;
int width;
int height;
int ksize;
int halfKsize;

FcvFilter2DLoop_Invoker(const FcvFilter2DLoop_Invoker &); // = delete;
const FcvFilter2DLoop_Invoker& operator= (const FcvFilter2DLoop_Invoker &); // = delete;
};

void filter2D(InputArray _src, OutputArray _dst, int ddepth, InputArray _kernel)
{
CV_Assert(
Expand All @@ -103,25 +34,25 @@ void filter2D(InputArray _src, OutputArray _dst, int ddepth, InputArray _kernel)
// Check DSP initialization status and initialize if needed
FASTCV_CHECK_DSP_INIT();

int nThreads = getNumThreads();
int nStripes = (nThreads > 1) ? ((src.rows > 60) ? 3 * nThreads : 1) : 1;

if (ddepth == CV_8U && ksize.width == 3)
fcvFilterCorr3x3s8_v2Q((int8_t*)kernel.data, src.data, src.cols, src.rows, src.step, dst.data, dst.step);

switch (ddepth)
{
case CV_8U:
{
if(ksize.width == 3)
fcvFilterCorr3x3s8_v2Q((int8_t*)kernel.data, src.data, src.cols, src.rows, src.step, dst.data, dst.step);
else
fcvFilterCorrNxNu8Q((int8_t*)kernel.data, ksize.width, 0, src.data, src.cols, src.rows, src.step, dst.data, dst.step);

break;
}
case CV_16S:
{
CV_Assert(CV_MAT_DEPTH(kernel.type()) == CV_8S);
parallel_for_(Range(0, src.rows), FcvFilter2DLoop_Invoker(src, dst, kernel), nStripes);
fcvFilterCorrNxNu8s16Q((int8_t*)kernel.data, ksize.width, 0, src.data, src.cols, src.rows, src.step, (int16_t*)dst.data, dst.step);
break;
}
case CV_32F:
{
CV_Assert(CV_MAT_DEPTH(kernel.type()) == CV_32F);
parallel_for_(Range(0, src.rows), FcvFilter2DLoop_Invoker(src, dst, kernel), nStripes);
fcvFilterCorrNxNu8f32Q((float32_t*)kernel.data, ksize.width, src.data, src.cols, src.rows, src.step, (float32_t*)dst.data, dst.step);
break;
}
default:
Expand All @@ -131,6 +62,7 @@ void filter2D(InputArray _src, OutputArray _dst, int ddepth, InputArray _kernel)
break;
}
}

}

} // dsp::
Expand Down
2 changes: 1 addition & 1 deletion modules/fastcv/src/edges_dsp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ namespace cv {
namespace fastcv {
namespace dsp {

void canny(InputArray _src, OutputArray _dst, int lowThreshold, int highThreshold, int apertureSize, bool L2gradient)
void Canny(InputArray _src, OutputArray _dst, int lowThreshold, int highThreshold, int apertureSize, bool L2gradient)
{
CV_Assert(
!_src.empty() &&
Expand Down
3 changes: 1 addition & 2 deletions modules/fastcv/src/precomp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,7 @@ namespace dsp {
struct FastCvDspContext;

#define IS_FASTCV_ALLOCATED(mat) \
((mat.u && mat.u->userdata && \
*static_cast<std::string*>(mat.u->userdata) == "QCOM") ? true : \
((mat.allocator == cv::fastcv::getQcAllocator()) ? true : \
(CV_Error(cv::Error::StsBadArg, cv::format("Matrix '%s' not allocated with FastCV allocator. " \
"Please ensure that the matrix is created using " \
"cv::fastcv::getQcAllocator().", #mat)), false))
Expand Down
2 changes: 1 addition & 1 deletion modules/fastcv/test/test_blur_dsp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ TEST_P(Filter2DTest_DSP, accuracy)
}

INSTANTIATE_TEST_CASE_P(FastCV_Extension, Filter2DTest_DSP, Combine(
/*image sie*/ Values(perf::szVGA, perf::sz720p, perf::sz1080p),
/*image sie*/ Values(perf::szVGA, perf::sz720p),
/*dst depth*/ Values(CV_8U,CV_16S,CV_32F),
/*kernel size*/ Values(3, 5, 7, 9, 11)
));
Expand Down
2 changes: 1 addition & 1 deletion modules/fastcv/test/test_edges_dsp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ TEST(DSP_CannyTest, accuracy)
int lowThreshold = 0;
int highThreshold = 150;

cv::fastcv::dsp::canny(src, dst, lowThreshold, highThreshold, 3, true);
cv::fastcv::dsp::Canny(src, dst, lowThreshold, highThreshold, 3, true);

//De-Initialize DSP
cv::fastcv::dsp::fcvdspdeinit();
Expand Down
Loading