Skip to content

Commit 3c8df63

Browse files
author
tingbo.liao
committed
Further rearranged the rotm kernel for the different architectures.
Signed-off-by: tingbo.liao <[email protected]>
1 parent 4e817f8 commit 3c8df63

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+770
-141
lines changed

cmake/kernel.cmake

+3
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,9 @@ macro(SetDefaultL1)
7979
SetFallback(CROTKERNEL zrot.S)
8080
SetFallback(ZROTKERNEL zrot.S)
8181
SetFallback(XROTKERNEL zrot.S)
82+
SetFallback(SROTMKERNEL rotm.S)
83+
SetFallback(DROTMKERNEL rotm.S)
84+
SetFallback(QROTMKERNEL rotm.S)
8285
SetFallback(SSCALKERNEL scal.S)
8386
SetFallback(DSCALKERNEL scal.S)
8487
SetFallback(CSCALKERNEL zscal.S)

common_d.h

+2
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#define DSUM_K dsum_k
2323
#define DSWAP_K dswap_k
2424
#define DROT_K drot_k
25+
#define DROTM_K drotm_k
2526

2627
#define DGEMV_N dgemv_n
2728
#define DGEMV_T dgemv_t
@@ -180,6 +181,7 @@
180181
#define DSUM_K gotoblas -> dsum_k
181182
#define DSWAP_K gotoblas -> dswap_k
182183
#define DROT_K gotoblas -> drot_k
184+
#define DROTM_K gotoblas -> drotm_k
183185

184186
#define DGEMV_N gotoblas -> dgemv_n
185187
#define DGEMV_T gotoblas -> dgemv_t

common_level1.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -213,9 +213,9 @@ int srotmg_k(float *, float *, float *, float *, float *);
213213
int drotmg_k(double *, double *, double *, double *, double *);
214214
int qrotmg_k(xdouble *, xdouble *, xdouble *, xdouble *, xdouble *);
215215

216-
int srotm_k (BLASLONG, float, BLASLONG, float, BLASLONG, float);
217-
int drotm_k (BLASLONG, double, BLASLONG, double, BLASLONG, double);
218-
int qrotm_k (BLASLONG, xdouble, BLASLONG, xdouble, BLASLONG, xdouble);
216+
int srotm_k (BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
217+
int drotm_k (BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
218+
int qrotm_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *);
219219

220220

221221
int saxpby_k (BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG);

common_macro.h

+3
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
#define SUM_K QSUM_K
7171
#define SWAP_K QSWAP_K
7272
#define ROT_K QROT_K
73+
#define ROTM_K QROTM_K
7374

7475
#define GEMV_N QGEMV_N
7576
#define GEMV_T QGEMV_T
@@ -361,6 +362,7 @@
361362
#define SUM_K DSUM_K
362363
#define SWAP_K DSWAP_K
363364
#define ROT_K DROT_K
365+
#define ROTM_K DROTM_K
364366

365367
#define GEMV_N DGEMV_N
366368
#define GEMV_T DGEMV_T
@@ -977,6 +979,7 @@
977979
#define SUM_K SSUM_K
978980
#define SWAP_K SSWAP_K
979981
#define ROT_K SROT_K
982+
#define ROTM_K SROTM_K
980983

981984
#define GEMV_N SGEMV_N
982985
#define GEMV_T SGEMV_T

common_param.h

+3
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,7 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);
197197
//double (*dsdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
198198

199199
int (*srot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float);
200+
int (*srotm_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
200201
#endif
201202
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
202203
int (*saxpy_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
@@ -330,6 +331,7 @@ BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG);
330331
#endif
331332
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
332333
int (*drot_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG, double, double);
334+
int (*drotm_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
333335
int (*daxpy_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
334336
int (*dscal_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
335337
int (*dswap_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
@@ -439,6 +441,7 @@ BLASLONG (*iqmin_k) (BLASLONG, xdouble *, BLASLONG);
439441
int (*qcopy_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
440442
xdouble (*qdot_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
441443
int (*qrot_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble, xdouble);
444+
int (*qrotm_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *);
442445

443446
int (*qaxpy_k) (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
444447
int (*qscal_k) (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);

common_q.h

+2
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#define QSUM_K qsum_k
2323
#define QSWAP_K qswap_k
2424
#define QROT_K qrot_k
25+
#define QROTM_K qrotm_k
2526

2627
#define QGEMV_N qgemv_n
2728
#define QGEMV_T qgemv_t
@@ -165,6 +166,7 @@
165166
#define QSUM_K gotoblas -> qsum_k
166167
#define QSWAP_K gotoblas -> qswap_k
167168
#define QROT_K gotoblas -> qrot_k
169+
#define QROTM_K gotoblas -> qrotm_k
168170

169171
#define QGEMV_N gotoblas -> qgemv_n
170172
#define QGEMV_T gotoblas -> qgemv_t

common_s.h

+2
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#define SSCAL_K sscal_k
2525
#define SSWAP_K sswap_k
2626
#define SROT_K srot_k
27+
#define SROTM_K srotm_k
2728

2829
#define SGEMV_N sgemv_n
2930
#define SGEMV_T sgemv_t
@@ -189,6 +190,7 @@
189190
#define SSCAL_K gotoblas -> sscal_k
190191
#define SSWAP_K gotoblas -> sswap_k
191192
#define SROT_K gotoblas -> srot_k
193+
#define SROTM_K gotoblas -> srotm_k
192194

193195
#define SGEMV_N gotoblas -> sgemv_n
194196
#define SGEMV_T gotoblas -> sgemv_t

interface/rotm.c

+6-134
Original file line numberDiff line numberDiff line change
@@ -7,149 +7,21 @@
77

88
void NAME(blasint *N, FLOAT *dx, blasint *INCX, FLOAT *dy, blasint *INCY, FLOAT *dparam){
99

10-
blasint n = *N;
11-
blasint incx = *INCX;
12-
blasint incy = *INCY;
10+
blasint n = *N;
11+
blasint incx = *INCX;
12+
blasint incy = *INCY;
1313

14+
PRINT_DEBUG_NAME
1415
#else
1516

1617
void CNAME(blasint n, FLOAT *dx, blasint incx, FLOAT *dy, blasint incy, FLOAT *dparam){
1718

18-
#endif
19-
20-
blasint i__1, i__2;
19+
PRINT_DEBUG_CNAME;
2120

22-
blasint i__;
23-
FLOAT w, z__;
24-
blasint kx, ky;
25-
FLOAT dh11, dh12, dh22, dh21, dflag;
26-
blasint nsteps;
27-
28-
#ifndef CBLAS
29-
PRINT_DEBUG_CNAME;
30-
#else
31-
PRINT_DEBUG_CNAME;
3221
#endif
3322

34-
--dparam;
35-
--dy;
36-
--dx;
37-
38-
dflag = dparam[1];
39-
if (n <= 0 || dflag == - 2.0) goto L140;
40-
41-
if (! (incx == incy && incx > 0)) goto L70;
42-
43-
nsteps = n * incx;
44-
if (dflag < 0.) {
45-
goto L50;
46-
} else if (dflag == 0) {
47-
goto L10;
48-
} else {
49-
goto L30;
50-
}
51-
L10:
52-
dh12 = dparam[4];
53-
dh21 = dparam[3];
54-
i__1 = nsteps;
55-
i__2 = incx;
56-
for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
57-
w = dx[i__];
58-
z__ = dy[i__];
59-
dx[i__] = w + z__ * dh12;
60-
dy[i__] = w * dh21 + z__;
61-
/* L20: */
62-
}
63-
goto L140;
64-
L30:
65-
dh11 = dparam[2];
66-
dh22 = dparam[5];
67-
i__2 = nsteps;
68-
i__1 = incx;
69-
for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
70-
w = dx[i__];
71-
z__ = dy[i__];
72-
dx[i__] = w * dh11 + z__;
73-
dy[i__] = -w + dh22 * z__;
74-
/* L40: */
75-
}
76-
goto L140;
77-
L50:
78-
dh11 = dparam[2];
79-
dh12 = dparam[4];
80-
dh21 = dparam[3];
81-
dh22 = dparam[5];
82-
i__1 = nsteps;
83-
i__2 = incx;
84-
for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
85-
w = dx[i__];
86-
z__ = dy[i__];
87-
dx[i__] = w * dh11 + z__ * dh12;
88-
dy[i__] = w * dh21 + z__ * dh22;
89-
/* L60: */
90-
}
91-
goto L140;
92-
L70:
93-
kx = 1;
94-
ky = 1;
95-
if (incx < 0) {
96-
kx = (1 - n) * incx + 1;
97-
}
98-
if (incy < 0) {
99-
ky = (1 - n) * incy + 1;
100-
}
23+
ROTM_K(n, dx, incx, dy, incy, dparam);
10124

102-
if (dflag < 0.) {
103-
goto L120;
104-
} else if (dflag == 0) {
105-
goto L80;
106-
} else {
107-
goto L100;
108-
}
109-
L80:
110-
dh12 = dparam[4];
111-
dh21 = dparam[3];
112-
i__2 = n;
113-
for (i__ = 1; i__ <= i__2; ++i__) {
114-
w = dx[kx];
115-
z__ = dy[ky];
116-
dx[kx] = w + z__ * dh12;
117-
dy[ky] = w * dh21 + z__;
118-
kx += incx;
119-
ky += incy;
120-
/* L90: */
121-
}
122-
goto L140;
123-
L100:
124-
dh11 = dparam[2];
125-
dh22 = dparam[5];
126-
i__2 = n;
127-
for (i__ = 1; i__ <= i__2; ++i__) {
128-
w = dx[kx];
129-
z__ = dy[ky];
130-
dx[kx] = w * dh11 + z__;
131-
dy[ky] = -w + dh22 * z__;
132-
kx += incx;
133-
ky += incy;
134-
/* L110: */
135-
}
136-
goto L140;
137-
L120:
138-
dh11 = dparam[2];
139-
dh12 = dparam[4];
140-
dh21 = dparam[3];
141-
dh22 = dparam[5];
142-
i__2 = n;
143-
for (i__ = 1; i__ <= i__2; ++i__) {
144-
w = dx[kx];
145-
z__ = dy[ky];
146-
dx[kx] = w * dh11 + z__ * dh12;
147-
dy[ky] = w * dh21 + z__ * dh22;
148-
kx += incx;
149-
ky += incy;
150-
/* L130: */
151-
}
152-
L140:
15325
return;
15426
}
15527

kernel/CMakeLists.txt

+3
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
125125
GenerateNamedObjects("${KERNELDIR}/${SNRM2KERNEL}" "" "nrm2_k" false "" "" false "SINGLE")
126126
GenerateNamedObjects("${KERNELDIR}/${SDOTKERNEL}" "" "dot_k" false "" "" false "SINGLE")
127127
GenerateNamedObjects("${KERNELDIR}/${SROTKERNEL}" "" "rot_k" false "" "" false "SINGLE")
128+
GenerateNamedObjects("${KERNELDIR}/${SROTMKERNEL}" "" "rotm_k" false "" "" false "SINGLE")
128129
endif ()
129130
if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE)
130131
GenerateNamedObjects("${KERNELDIR}/${DAMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false "DOUBLE")
@@ -148,6 +149,7 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
148149
GenerateNamedObjects("${KERNELDIR}/${DCOPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false "DOUBLE")
149150
GenerateNamedObjects("${KERNELDIR}/${DNRM2KERNEL}" "" "nrm2_k" false "" "" false "DOUBLE")
150151
GenerateNamedObjects("${KERNELDIR}/${DROTKERNEL}" "" "rot_k" false "" "" false "DOUBLE")
152+
GenerateNamedObjects("${KERNELDIR}/${DROTMKERNEL}" "" "rotm_k" false "" "" false "DOUBLE")
151153
GenerateNamedObjects("${KERNELDIR}/${DDOTKERNEL}" "" "dot_k" false "" "" false "DOUBLE")
152154
GenerateNamedObjects("${KERNELDIR}/${DSWAPKERNEL}" "" "swap_k" false "" "" false "DOUBLE")
153155
GenerateNamedObjects("${KERNELDIR}/${DAXPYKERNEL}" "" "axpy_k" false "" "" false "DOUBLE")
@@ -1105,6 +1107,7 @@ endif ()
11051107
GenerateNamedObjects("${KERNELDIR}/${DCOPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false "DOUBLE")
11061108
GenerateNamedObjects("${KERNELDIR}/${DNRM2KERNEL}" "" "nrm2_k" false "" "" false "DOUBLE")
11071109
GenerateNamedObjects("${KERNELDIR}/${DROTKERNEL}" "" "rot_k" false "" "" false "DOUBLE")
1110+
GenerateNamedObjects("${KERNELDIR}/${DROTMKERNEL}" "" "rotm_k" false "" "" false "DOUBLE")
11081111
GenerateNamedObjects("${KERNELDIR}/${DDOTKERNEL}" "" "dot_k" false "" "" false "DOUBLE")
11091112
GenerateNamedObjects("${KERNELDIR}/${DSWAPKERNEL}" "" "swap_k" false "" "" false "DOUBLE")
11101113
GenerateNamedObjects("${KERNELDIR}/${DAXPYKERNEL}" "" "axpy_k" false "" "" false "DOUBLE")

kernel/Makefile.L1

+20-2
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,18 @@ ifndef XROTKERNEL
336336
XROTKERNEL = zrot.S
337337
endif
338338

339+
ifndef SROTMKERNEL
340+
SROTMKERNEL = rotm.S
341+
endif
342+
343+
ifndef DROTMKERNEL
344+
DROTMKERNEL = rotm.S
345+
endif
346+
347+
ifndef QROTMKERNEL
348+
QROTMKERNEL = rotm.S
349+
endif
350+
339351
### SCAL ###
340352

341353
ifndef SSCALKERNEL
@@ -504,14 +516,14 @@ SBLASOBJS += \
504516
sasum_k$(TSUFFIX).$(SUFFIX) ssum_k$(TSUFFIX).$(SUFFIX) saxpy_k$(TSUFFIX).$(SUFFIX) scopy_k$(TSUFFIX).$(SUFFIX) \
505517
sdot_k$(TSUFFIX).$(SUFFIX) sdsdot_k$(TSUFFIX).$(SUFFIX) dsdot_k$(TSUFFIX).$(SUFFIX) \
506518
snrm2_k$(TSUFFIX).$(SUFFIX) srot_k$(TSUFFIX).$(SUFFIX) sscal_k$(TSUFFIX).$(SUFFIX) sswap_k$(TSUFFIX).$(SUFFIX) \
507-
saxpby_k$(TSUFFIX).$(SUFFIX)
519+
saxpby_k$(TSUFFIX).$(SUFFIX) srotm_k$(TSUFFIX).$(SUFFIX)
508520

509521
DBLASOBJS += \
510522
damax_k$(TSUFFIX).$(SUFFIX) damin_k$(TSUFFIX).$(SUFFIX) dmax_k$(TSUFFIX).$(SUFFIX) dmin_k$(TSUFFIX).$(SUFFIX) \
511523
idamax_k$(TSUFFIX).$(SUFFIX) idamin_k$(TSUFFIX).$(SUFFIX) idmax_k$(TSUFFIX).$(SUFFIX) idmin_k$(TSUFFIX).$(SUFFIX) \
512524
dasum_k$(TSUFFIX).$(SUFFIX) daxpy_k$(TSUFFIX).$(SUFFIX) dcopy_k$(TSUFFIX).$(SUFFIX) ddot_k$(TSUFFIX).$(SUFFIX) \
513525
dnrm2_k$(TSUFFIX).$(SUFFIX) drot_k$(TSUFFIX).$(SUFFIX) dscal_k$(TSUFFIX).$(SUFFIX) dswap_k$(TSUFFIX).$(SUFFIX) \
514-
daxpby_k$(TSUFFIX).$(SUFFIX) dsum_k$(TSUFFIX).$(SUFFIX)
526+
daxpby_k$(TSUFFIX).$(SUFFIX) dsum_k$(TSUFFIX).$(SUFFIX) drotm_k$(TSUFFIX).$(SUFFIX)
515527

516528
QBLASOBJS += \
517529
qamax_k$(TSUFFIX).$(SUFFIX) qamin_k$(TSUFFIX).$(SUFFIX) qmax_k$(TSUFFIX).$(SUFFIX) qmin_k$(TSUFFIX).$(SUFFIX) \
@@ -841,6 +853,12 @@ $(KDIR)srot_k$(TSUFFIX).$(SUFFIX) $(KDIR)srot_k$(TPSUFFIX).$(PSUFFIX) : $(KERN
841853
$(KDIR)drot_k$(TSUFFIX).$(SUFFIX) $(KDIR)drot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DROTKERNEL)
842854
$(CC) -c $(CFLAGS) $(FMAFLAG) -UCOMPLEX -UCOMPLEX -DDOUBLE $< -o $@
843855

856+
$(KDIR)srotm_k$(TSUFFIX).$(SUFFIX) $(KDIR)srotm_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SROTMKERNEL)
857+
$(CC) -c $(CFLAGS) $(FMAFLAG) -UCOMPLEX -UCOMPLEX -UDOUBLE $< -o $@
858+
859+
$(KDIR)drotm_k$(TSUFFIX).$(SUFFIX) $(KDIR)drotm_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DROTMKERNEL)
860+
$(CC) -c $(CFLAGS) $(FMAFLAG) -UCOMPLEX -UCOMPLEX -DDOUBLE $< -o $@
861+
844862
$(KDIR)qrot_k$(TSUFFIX).$(SUFFIX) $(KDIR)qrot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QROTKERNEL)
845863
$(CC) -c $(CFLAGS) -UCOMPLEX -UCOMPLEX -DXDOUBLE $< -o $@
846864

kernel/alpha/KERNEL

+12
Original file line numberDiff line numberDiff line change
@@ -122,3 +122,15 @@ ZTRSMKERNEL_LN = ztrsm_kernel_2x2_LN.S
122122
ZTRSMKERNEL_LT = ztrsm_kernel_2x2_LT.S
123123
ZTRSMKERNEL_RN = ztrsm_kernel_2x2_LT.S
124124
ZTRSMKERNEL_RT = ztrsm_kernel_2x2_RT.S
125+
126+
ifndef SROTMKERNEL
127+
SROTMKERNEL = ../generic/rotm.c
128+
endif
129+
130+
ifndef DROTMKERNEL
131+
DROTMKERNEL = ../generic/rotm.c
132+
endif
133+
134+
ifndef QROTMKERNEL
135+
QROTMKERNEL = ../generic/rotm.c
136+
endif

kernel/arm/KERNEL

+10
Original file line numberDiff line numberDiff line change
@@ -43,4 +43,14 @@ ifndef ZGEMM_BETA
4343
ZGEMM_BETA = ../generic/zgemm_beta.c
4444
endif
4545

46+
ifndef SROTMKERNEL
47+
SROTMKERNEL = ../generic/rotm.c
48+
endif
49+
50+
ifndef DROTMKERNEL
51+
DROTMKERNEL = ../generic/rotm.c
52+
endif
4653

54+
ifndef QROTMKERNEL
55+
QROTMKERNEL = ../generic/rotm.c
56+
endif

kernel/arm64/KERNEL

+10
Original file line numberDiff line numberDiff line change
@@ -45,4 +45,14 @@ ifndef ZGEMM_BETA
4545
ZGEMM_BETA = ../generic/zgemm_beta.c
4646
endif
4747

48+
ifndef SROTMKERNEL
49+
SROTMKERNEL = ../generic/rotm.c
50+
endif
51+
52+
ifndef DROTMKERNEL
53+
DROTMKERNEL = ../generic/rotm.c
54+
endif
4855

56+
ifndef QROTMKERNEL
57+
QROTMKERNEL = ../generic/rotm.c
58+
endif

0 commit comments

Comments
 (0)