Skip to content

Commit 22cd23e

Browse files
committed
sellc: rollback on local associates, use embedded chunk kernels
1 parent 7e45901 commit 22cd23e

File tree

1 file changed

+84
-45
lines changed

1 file changed

+84
-45
lines changed

Diff for: src/stdlib_sparse_spmv.fypp

+84-45
Original file line numberDiff line numberDiff line change
@@ -418,7 +418,7 @@ contains
418418
character(1), intent(in), optional :: op
419419
${t1}$ :: alpha_
420420
character(1) :: op_
421-
integer(ilp) :: i, j, k, nz, rowidx, num_chunks, rm
421+
integer(ilp) :: i, nz, rowidx, num_chunks, rm
422422

423423
op_ = sparse_op_none; if(present(op)) op_ = op
424424
alpha_ = one_${s1}$
@@ -447,12 +447,7 @@ contains
447447
do i = 1, num_chunks
448448
nz = ia(i+1) - ia(i)
449449
rowidx = (i - 1)*${chunk}$ + 1
450-
associate(col => ja(1:${chunk}$,ia(i):ia(i)+nz-1), mat => data(1:${chunk}$,ia(i):ia(i)+nz-1), &
451-
& x => vec_x, y => vec_y(rowidx:rowidx+${chunk}$-1) )
452-
do j = 1, nz
453-
where(col(:,j) > 0) y = y + alpha_ * mat(:,j) * x(col(:,j))
454-
end do
455-
end associate
450+
call chunk_kernel_${chunk}$(nz,data(:,ia(i)),ja(:,ia(i)),vec_x,vec_y(rowidx:))
456451
end do
457452
#:endfor
458453
end select
@@ -462,12 +457,7 @@ contains
462457
i = num_chunks + 1
463458
nz = ia(i+1) - ia(i)
464459
rowidx = (i - 1)*cs + 1
465-
associate(col => ja(1:cs,ia(i):ia(i)+nz-1), mat => data(1:cs,ia(i):ia(i)+nz-1), &
466-
& x => vec_x, y => vec_y(rowidx:rowidx+rm-1) )
467-
do j = 1, nz
468-
where(col(1:rm,j) > 0) y = y + alpha_ * mat(1:rm,j) * x(col(1:rm,j))
469-
end do
470-
end associate
460+
call chunk_kernel_rm(nz,cs,rm,data(:,ia(i)),ja(:,ia(i)),vec_x,vec_y(rowidx:))
471461
end if
472462

473463
else if( storage == sparse_full .and. op_==sparse_op_transpose ) then
@@ -478,14 +468,7 @@ contains
478468
do i = 1, num_chunks
479469
nz = ia(i+1) - ia(i)
480470
rowidx = (i - 1)*${chunk}$ + 1
481-
associate(col => ja(1:${chunk}$,ia(i):ia(i)+nz-1), mat => data(1:${chunk}$,ia(i):ia(i)+nz-1), &
482-
& x => vec_x(rowidx:rowidx+${chunk}$-1), y => vec_y )
483-
do j = 1, nz
484-
do k = 1, ${chunk}$
485-
if(col(k,j) > 0) y(col(k,j)) = y(col(k,j)) + alpha_ * mat(k,j) * x(k)
486-
end do
487-
end do
488-
end associate
471+
call chunk_kernel_trans_${chunk}$(nz,data(:,ia(i)),ja(:,ia(i)),vec_x(rowidx:),vec_y)
489472
end do
490473
#:endfor
491474
end select
@@ -495,14 +478,7 @@ contains
495478
i = num_chunks + 1
496479
nz = ia(i+1) - ia(i)
497480
rowidx = (i - 1)*cs + 1
498-
associate(col => ja(1:cs,ia(i):ia(i)+nz-1), mat => data(1:cs,ia(i):ia(i)+nz-1), &
499-
& x => vec_x(rowidx:rowidx+rm-1), y => vec_y )
500-
do j = 1, nz
501-
do k = 1, rm
502-
if(col(k,j) > 0) y(col(k,j)) = y(col(k,j)) + alpha_ * mat(k,j) * x(k)
503-
end do
504-
end do
505-
end associate
481+
call chunk_kernel_rm_trans(nz,cs,rm,data(:,ia(i)),ja(:,ia(i)),vec_x(rowidx:),vec_y)
506482
end if
507483

508484
#:if t1.startswith('complex')
@@ -514,14 +490,7 @@ contains
514490
do i = 1, num_chunks
515491
nz = ia(i+1) - ia(i)
516492
rowidx = (i - 1)*${chunk}$ + 1
517-
associate(col => ja(1:${chunk}$,ia(i):ia(i)+nz-1), mat => data(1:${chunk}$,ia(i):ia(i)+nz-1), &
518-
& x => vec_x(rowidx:rowidx+${chunk}$-1), y => vec_y )
519-
do j = 1, nz
520-
do k = 1, ${chunk}$
521-
if(col(k,j) > 0) y(col(k,j)) = y(col(k,j)) + alpha_ * conjg(mat(k,j)) * x(k)
522-
end do
523-
end do
524-
end associate
493+
call chunk_kernel_herm_${chunk}$(nz,data(:,ia(i)),ja(:,ia(i)),vec_x(rowidx:),vec_y)
525494
end do
526495
#:endfor
527496
end select
@@ -531,14 +500,7 @@ contains
531500
i = num_chunks + 1
532501
nz = ia(i+1) - ia(i)
533502
rowidx = (i - 1)*cs + 1
534-
associate(col => ja(1:cs,ia(i):ia(i)+nz-1), mat => data(1:cs,ia(i):ia(i)+nz-1), &
535-
& x => vec_x(rowidx:rowidx+rm-1), y => vec_y )
536-
do j = 1, nz
537-
do k = 1, rm
538-
if(col(k,j) > 0) y(col(k,j)) = y(col(k,j)) + alpha_ * conjg(mat(k,j)) * x(k)
539-
end do
540-
end do
541-
end associate
503+
call chunk_kernel_rm_herm(nz,cs,rm,data(:,ia(i)),ja(:,ia(i)),vec_x(rowidx:),vec_y)
542504
end if
543505
#:endif
544506
else
@@ -547,6 +509,83 @@ contains
547509
end if
548510
end associate
549511

512+
contains
513+
#:for chunk in CHUNKS
514+
pure subroutine chunk_kernel_${chunk}$(n,a,col,x,y)
515+
integer, value :: n
516+
${t1}$, intent(in) :: a(${chunk}$,n), x(*)
517+
integer(ilp), intent(in) :: col(${chunk}$,n)
518+
${t1}$, intent(inout) :: y(${chunk}$)
519+
integer :: j
520+
do j = 1, n
521+
where(col(:,j) > 0) y = y + alpha_ * a(:,j) * x(col(:,j))
522+
end do
523+
end subroutine
524+
pure subroutine chunk_kernel_trans_${chunk}$(n,a,col,x,y)
525+
integer, value :: n
526+
${t1}$, intent(in) :: a(${chunk}$,n), x(${chunk}$)
527+
integer(ilp), intent(in) :: col(${chunk}$,n)
528+
${t1}$, intent(inout) :: y(*)
529+
integer :: j, k
530+
do j = 1, n
531+
do k = 1, ${chunk}$
532+
if(col(k,j) > 0) y(col(k,j)) = y(col(k,j)) + alpha_ * a(k,j) * x(k)
533+
end do
534+
end do
535+
end subroutine
536+
#:if t1.startswith('complex')
537+
pure subroutine chunk_kernel_herm_${chunk}$(n,a,col,x,y)
538+
integer, value :: n
539+
${t1}$, intent(in) :: a(${chunk}$,n), x(${chunk}$)
540+
integer(ilp), intent(in) :: col(${chunk}$,n)
541+
${t1}$, intent(inout) :: y(*)
542+
integer :: j, k
543+
do j = 1, n
544+
do k = 1, ${chunk}$
545+
if(col(k,j) > 0) y(col(k,j)) = y(col(k,j)) + alpha_ * conjg(a(k,j)) * x(k)
546+
end do
547+
end do
548+
end subroutine
549+
#:endif
550+
#:endfor
551+
552+
pure subroutine chunk_kernel_rm(n,cs,r,a,col,x,y)
553+
integer, value :: n, cs, r
554+
${t1}$, intent(in) :: a(cs,n), x(*)
555+
integer(ilp), intent(in) :: col(cs,n)
556+
${t1}$, intent(inout) :: y(r)
557+
integer :: j
558+
do j = 1, n
559+
where(col(1:r,j) > 0) y = y + alpha_ * a(1:r,j) * x(col(1:r,j))
560+
end do
561+
end subroutine
562+
pure subroutine chunk_kernel_rm_trans(n,cs,r,a,col,x,y)
563+
integer, value :: n, cs, r
564+
${t1}$, intent(in) :: a(cs,n), x(r)
565+
integer(ilp), intent(in) :: col(cs,n)
566+
${t1}$, intent(inout) :: y(*)
567+
integer :: j, k
568+
do j = 1, n
569+
do k = 1, r
570+
if(col(k,j) > 0) y(col(k,j)) = y(col(k,j)) + alpha_ * a(k,j) * x(k)
571+
end do
572+
end do
573+
end subroutine
574+
#:if t1.startswith('complex')
575+
pure subroutine chunk_kernel_rm_herm(n,cs,r,a,col,x,y)
576+
integer, value :: n, cs, r
577+
${t1}$, intent(in) :: a(cs,n), x(r)
578+
integer(ilp), intent(in) :: col(cs,n)
579+
${t1}$, intent(inout) :: y(*)
580+
integer :: j, k
581+
do j = 1, n
582+
do k = 1, r
583+
if(col(k,j) > 0) y(col(k,j)) = y(col(k,j)) + alpha_ * conjg(a(k,j)) * x(k)
584+
end do
585+
end do
586+
end subroutine
587+
#:endif
588+
550589
end subroutine
551590

552591
#:endfor

0 commit comments

Comments
 (0)