Skip to content

Commit a3b0ef6

Browse files
committed
Restore riscv64 fixes from develop branch: dot product double precision accumulation, zscal NaN handling
1 parent 1093def commit a3b0ef6

File tree

4 files changed

+26
-154
lines changed

4 files changed

+26
-154
lines changed

Diff for: Makefile.prebuild

+1
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ endif
5757

5858
ifeq ($(TARGET), CK860FV)
5959
TARGET_FLAGS = -march=ck860v -mcpu=ck860fv -mfdivdu -mhard-float
60+
endif
6061

6162
ifeq ($(TARGET), x280)
6263
TARGET_FLAGS = -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d

Diff for: kernel/riscv64/dot.c

+10
Original file line numberDiff line numberDiff line change
@@ -44,14 +44,24 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
4444
{
4545
BLASLONG i=0;
4646
BLASLONG ix=0,iy=0;
47+
48+
#if defined(DSDOT)
4749
double dot = 0.0 ;
50+
#else
51+
FLOAT dot = 0.0 ;
52+
#endif
4853

4954
if ( n < 1 ) return(dot);
5055

5156
while(i < n)
5257
{
5358

59+
#if defined(DSDOT)
60+
dot += (double) y[iy] * (double) x[ix] ;
61+
#else
5462
dot += y[iy] * x[ix] ;
63+
#endif
64+
5565
ix += inc_x ;
5666
iy += inc_y ;
5767
i++ ;

Diff for: kernel/riscv64/zscal_rvv.c

+14-76
Original file line numberDiff line numberDiff line change
@@ -69,49 +69,26 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r,FLOAT da_i, F
6969
size_t vlmax = VSETVL_MAX;
7070
FLOAT_VX2_T vx2;
7171

72-
if(da_r == 0.0 && da_i == 0.0) {
72+
if(inc_x == 1) {
7373

74-
vr = VFMVVF_FLOAT(0.0, vlmax);
75-
vi = VFMVVF_FLOAT(0.0, vlmax);
76-
77-
if(inc_x == 1) {
78-
79-
for (size_t vl; n > 0; n -= vl, x += vl*2) {
80-
vl = VSETVL(n);
81-
vx2 = VSET_VX2(vx2, 0, vr);
82-
vx2 = VSET_VX2(vx2, 1, vi);
83-
VSSEG_FLOAT(x, vx2, vl);
84-
}
85-
86-
} else {
87-
88-
for (size_t vl; n > 0; n -= vl, x += vl*inc_x*2) {
89-
vl = VSETVL(n);
90-
vx2 = VSET_VX2(vx2, 0, vr);
91-
vx2 = VSET_VX2(vx2, 1, vi);
92-
VSSSEG_FLOAT(x, stride_x, vx2, vl);
93-
}
94-
}
95-
96-
} else if(da_r == 0.0) {
97-
98-
for (size_t vl; n > 0; n -= vl, x += vl*inc_x*2) {
74+
for (size_t vl; n > 0; n -= vl, x += vl*2) {
9975
vl = VSETVL(n);
100-
101-
vx2 = VLSSEG_FLOAT(x, stride_x, vl);
76+
77+
vx2 = VLSEG_FLOAT(x, vl);
10278
vr = VGET_VX2(vx2, 0);
10379
vi = VGET_VX2(vx2, 1);
10480

105-
vt = VFMULVF_FLOAT(vi, -da_i, vl);
106-
vi = VFMULVF_FLOAT(vr, da_i, vl);
81+
vt = VFMULVF_FLOAT(vr, da_r, vl);
82+
vt = VFNMSACVF_FLOAT(vt, da_i, vi, vl);
83+
vi = VFMULVF_FLOAT(vi, da_r, vl);
84+
vi = VFMACCVF_FLOAT(vi, da_i, vr, vl);
10785

10886
vx2 = VSET_VX2(vx2, 0, vt);
10987
vx2 = VSET_VX2(vx2, 1, vi);
110-
111-
VSSSEG_FLOAT(x, stride_x, vx2, vl);
88+
VSSEG_FLOAT(x, vx2, vl);
11289
}
11390

114-
} else if(da_i == 0.0) {
91+
} else {
11592

11693
for (size_t vl; n > 0; n -= vl, x += vl*inc_x*2) {
11794
vl = VSETVL(n);
@@ -120,54 +97,15 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r,FLOAT da_i, F
12097
vr = VGET_VX2(vx2, 0);
12198
vi = VGET_VX2(vx2, 1);
12299

123-
vr = VFMULVF_FLOAT(vr, da_r, vl);
100+
vt = VFMULVF_FLOAT(vr, da_r, vl);
101+
vt = VFNMSACVF_FLOAT(vt, da_i, vi, vl);
124102
vi = VFMULVF_FLOAT(vi, da_r, vl);
103+
vi = VFMACCVF_FLOAT(vi, da_i, vr, vl);
125104

126-
vx2 = VSET_VX2(vx2, 0, vr);
105+
vx2 = VSET_VX2(vx2, 0, vt);
127106
vx2 = VSET_VX2(vx2, 1, vi);
128107
VSSSEG_FLOAT(x, stride_x, vx2, vl);
129108
}
130-
131-
} else {
132-
133-
if(inc_x == 1) {
134-
135-
for (size_t vl; n > 0; n -= vl, x += vl*2) {
136-
vl = VSETVL(n);
137-
138-
vx2 = VLSEG_FLOAT(x, vl);
139-
vr = VGET_VX2(vx2, 0);
140-
vi = VGET_VX2(vx2, 1);
141-
142-
vt = VFMULVF_FLOAT(vr, da_r, vl);
143-
vt = VFNMSACVF_FLOAT(vt, da_i, vi, vl);
144-
vi = VFMULVF_FLOAT(vi, da_r, vl);
145-
vi = VFMACCVF_FLOAT(vi, da_i, vr, vl);
146-
147-
vx2 = VSET_VX2(vx2, 0, vt);
148-
vx2 = VSET_VX2(vx2, 1, vi);
149-
VSSEG_FLOAT(x, vx2, vl);
150-
}
151-
152-
} else {
153-
154-
for (size_t vl; n > 0; n -= vl, x += vl*inc_x*2) {
155-
vl = VSETVL(n);
156-
157-
vx2 = VLSSEG_FLOAT(x, stride_x, vl);
158-
vr = VGET_VX2(vx2, 0);
159-
vi = VGET_VX2(vx2, 1);
160-
161-
vt = VFMULVF_FLOAT(vr, da_r, vl);
162-
vt = VFNMSACVF_FLOAT(vt, da_i, vi, vl);
163-
vi = VFMULVF_FLOAT(vi, da_r, vl);
164-
vi = VFMACCVF_FLOAT(vi, da_i, vr, vl);
165-
166-
vx2 = VSET_VX2(vx2, 0, vt);
167-
vx2 = VSET_VX2(vx2, 1, vi);
168-
VSSSEG_FLOAT(x, stride_x, vx2, vl);
169-
}
170-
}
171109
}
172110

173111
return(0);

Diff for: kernel/riscv64/zscal_vector.c

+1-78
Original file line numberDiff line numberDiff line change
@@ -59,84 +59,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r,FLOAT da_i, F
5959

6060
unsigned int gvl = 0;
6161
FLOAT_V_T vt, v0, v1;
62-
if(da_r == 0.0 && da_i == 0.0){
63-
gvl = VSETVL(n);
64-
BLASLONG stride_x = inc_x * 2 * sizeof(FLOAT);
65-
BLASLONG inc_xv = inc_x * 2 * gvl;
66-
vt = VFMVVF_FLOAT(0.0, gvl);
67-
for(i=0,j=0; i < n/(gvl*2); i++){
68-
VSSEV_FLOAT(&x[ix], stride_x, vt, gvl);
69-
VSSEV_FLOAT(&x[ix+1], stride_x, vt, gvl);
70-
VSSEV_FLOAT(&x[ix+inc_xv], stride_x, vt, gvl);
71-
VSSEV_FLOAT(&x[ix+inc_xv+1], stride_x, vt, gvl);
72-
73-
j += gvl*2;
74-
ix += inc_xv*2;
75-
}
76-
for(; j < n; ){
77-
gvl = VSETVL(n-j);
78-
VSSEV_FLOAT(&x[ix], stride_x, vt, gvl);
79-
VSSEV_FLOAT(&x[ix+1], stride_x, vt, gvl);
80-
j += gvl;
81-
ix += inc_x * 2 * gvl;
82-
}
83-
}else if(da_r == 0.0){
84-
gvl = VSETVL(n);
85-
BLASLONG stride_x = inc_x * 2 * sizeof(FLOAT);
86-
BLASLONG inc_xv = inc_x * 2 * gvl;
87-
for(i=0,j=0; i < n/gvl; i++){
88-
v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
89-
v1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl);
90-
91-
vt = VFMULVF_FLOAT(v1, -da_i, gvl);
92-
v1 = VFMULVF_FLOAT(v0, da_i, gvl);
93-
94-
VSSEV_FLOAT(&x[ix], stride_x, vt, gvl);
95-
VSSEV_FLOAT(&x[ix+1], stride_x, v1, gvl);
96-
97-
j += gvl;
98-
ix += inc_xv;
99-
}
100-
if(j < n){
101-
gvl = VSETVL(n-j);
102-
v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
103-
v1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl);
104-
105-
vt = VFMULVF_FLOAT(v1, -da_i, gvl);
106-
v1 = VFMULVF_FLOAT(v0, da_i, gvl);
107-
108-
VSSEV_FLOAT(&x[ix], stride_x, vt, gvl);
109-
VSSEV_FLOAT(&x[ix+1], stride_x, v1, gvl);
110-
}
111-
}else if(da_i == 0.0){
112-
gvl = VSETVL(n);
113-
BLASLONG stride_x = inc_x * 2 * sizeof(FLOAT);
114-
BLASLONG inc_xv = inc_x * 2 * gvl;
115-
for(i=0,j=0; i < n/gvl; i++){
116-
v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
117-
v1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl);
118-
119-
vt = VFMULVF_FLOAT(v0, da_r, gvl);
120-
v1 = VFMULVF_FLOAT(v1, da_r, gvl);
121-
122-
VSSEV_FLOAT(&x[ix], stride_x, vt, gvl);
123-
VSSEV_FLOAT(&x[ix+1], stride_x, v1, gvl);
124-
125-
j += gvl;
126-
ix += inc_xv;
127-
}
128-
if(j < n){
129-
gvl = VSETVL(n-j);
130-
v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
131-
v1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl);
132-
133-
vt = VFMULVF_FLOAT(v0, da_r, gvl);
134-
v1 = VFMULVF_FLOAT(v1, da_r, gvl);
135-
136-
VSSEV_FLOAT(&x[ix], stride_x, vt, gvl);
137-
VSSEV_FLOAT(&x[ix+1], stride_x, v1, gvl);
138-
}
139-
}else{
62+
{
14063
gvl = VSETVL(n);
14164
BLASLONG stride_x = inc_x * 2 * sizeof(FLOAT);
14265
BLASLONG inc_xv = inc_x * 2 * gvl;

0 commit comments

Comments
 (0)