@@ -102,6 +102,34 @@ namespace
102
102
cudaSafeCall ( cudaDeviceSynchronize () );
103
103
}
104
104
};
105
+
106
+ template <int DEPTH> struct NppMirrorIFunc
107
+ {
108
+ typedef typename NppTypeTraits<DEPTH>::npp_t npp_t ;
109
+
110
+ typedef NppStatus (*func_t )(npp_t * pSrcDst, int nSrcDstStep, NppiSize oROI, NppiAxis flip);
111
+ };
112
+
113
+ template <int DEPTH, typename NppMirrorIFunc<DEPTH>::func_t func> struct NppMirrorI
114
+ {
115
+ typedef typename NppMirrorIFunc<DEPTH>::npp_t npp_t ;
116
+
117
+ static void call (GpuMat& srcDst, int flipCode, cudaStream_t stream)
118
+ {
119
+ NppStreamHandler h (stream);
120
+
121
+ NppiSize sz;
122
+ sz.width = srcDst.cols ;
123
+ sz.height = srcDst.rows ;
124
+
125
+ nppSafeCall ( func (srcDst.ptr <npp_t >(), static_cast <int >(srcDst.step ),
126
+ sz,
127
+ (flipCode == 0 ? NPP_HORIZONTAL_AXIS : (flipCode > 0 ? NPP_VERTICAL_AXIS : NPP_BOTH_AXIS))) );
128
+
129
+ if (stream == 0 )
130
+ cudaSafeCall ( cudaDeviceSynchronize () );
131
+ }
132
+ };
105
133
}
106
134
107
135
void cv::cuda::flip (InputArray _src, OutputArray _dst, int flipCode, Stream& stream)
@@ -117,6 +145,17 @@ void cv::cuda::flip(InputArray _src, OutputArray _dst, int flipCode, Stream& str
117
145
{NppMirror<CV_32F, nppiMirror_32f_C1R>::call, 0 , NppMirror<CV_32F, nppiMirror_32f_C3R>::call, NppMirror<CV_32F, nppiMirror_32f_C4R>::call}
118
146
};
119
147
148
+ typedef void (*ifunc_t )(GpuMat& srcDst, int flipCode, cudaStream_t stream);
149
+ static const ifunc_t ifuncs[6 ][4 ] =
150
+ {
151
+ {NppMirrorI<CV_8U, nppiMirror_8u_C1IR>::call, 0 , NppMirrorI<CV_8U, nppiMirror_8u_C3IR>::call, NppMirrorI<CV_8U, nppiMirror_8u_C4IR>::call},
152
+ {0 ,0 ,0 ,0 },
153
+ {NppMirrorI<CV_16U, nppiMirror_16u_C1IR>::call, 0 , NppMirrorI<CV_16U, nppiMirror_16u_C3IR>::call, NppMirrorI<CV_16U, nppiMirror_16u_C4IR>::call},
154
+ {0 ,0 ,0 ,0 },
155
+ {NppMirrorI<CV_32S, nppiMirror_32s_C1IR>::call, 0 , NppMirrorI<CV_32S, nppiMirror_32s_C3IR>::call, NppMirrorI<CV_32S, nppiMirror_32s_C4IR>::call},
156
+ {NppMirrorI<CV_32F, nppiMirror_32f_C1IR>::call, 0 , NppMirrorI<CV_32F, nppiMirror_32f_C3IR>::call, NppMirrorI<CV_32F, nppiMirror_32f_C4IR>::call}
157
+ };
158
+
120
159
GpuMat src = getInputMat (_src, stream);
121
160
122
161
CV_Assert (src.depth () == CV_8U || src.depth () == CV_16U || src.depth () == CV_32S || src.depth () == CV_32F);
@@ -125,7 +164,10 @@ void cv::cuda::flip(InputArray _src, OutputArray _dst, int flipCode, Stream& str
125
164
_dst.create (src.size (), src.type ());
126
165
GpuMat dst = getOutputMat (_dst, src.size (), src.type (), stream);
127
166
128
- funcs[src.depth ()][src.channels () - 1 ](src, dst, flipCode, StreamAccessor::getStream (stream));
167
+ if (src.refcount != dst.refcount )
168
+ funcs[src.depth ()][src.channels () - 1 ](src, dst, flipCode, StreamAccessor::getStream (stream));
169
+ else // in-place
170
+ ifuncs[src.depth ()][src.channels () - 1 ](src, flipCode, StreamAccessor::getStream (stream));
129
171
130
172
syncOutput (dst, _dst, stream);
131
173
}
0 commit comments