Skip to content

Commit 58ac490

Browse files
committed
perf: improve image/jpeg decoder performance
improve JPEG decoder performance by about 10-14% by unrolling shift-clamp and unzig loops.
1 parent 8028731 commit 58ac490

File tree

2 files changed

+141
-17
lines changed

2 files changed

+141
-17
lines changed

src/image/jpeg/reader.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ const (
7575
// unzig maps from the zig-zag ordering to the natural ordering. For example,
7676
// unzig[3] is the column and row of the fourth element in zig-zag order. The
7777
// value is 16, which means first column (16%8 == 0) and third row (16/8 == 2).
78-
var unzig = [blockSize]int{
78+
var unzig = [blockSize]uint8{
7979
0, 1, 8, 16, 9, 2, 3, 10,
8080
17, 24, 32, 25, 18, 11, 4, 5,
8181
12, 19, 26, 33, 40, 48, 41, 34,

src/image/jpeg/scan.go

+140-16
Original file line numberDiff line numberDiff line change
@@ -465,9 +465,73 @@ func (d *decoder) reconstructProgressiveImage() error {
465465
// to the image.
466466
func (d *decoder) reconstructBlock(b *block, bx, by, compIndex int) error {
467467
qt := &d.quant[d.comp[compIndex].tq]
468-
for zig := 0; zig < blockSize; zig++ {
469-
b[unzig[zig]] *= qt[zig]
470-
}
468+
469+
// This sequence exactly follows the indexes of the unzig mapping.
470+
b[0] *= qt[0]
471+
b[1] *= qt[1]
472+
b[8] *= qt[2]
473+
b[16] *= qt[3]
474+
b[9] *= qt[4]
475+
b[2] *= qt[5]
476+
b[3] *= qt[6]
477+
b[10] *= qt[7]
478+
b[17] *= qt[8]
479+
b[24] *= qt[9]
480+
b[32] *= qt[10]
481+
b[25] *= qt[11]
482+
b[18] *= qt[12]
483+
b[11] *= qt[13]
484+
b[4] *= qt[14]
485+
b[5] *= qt[15]
486+
b[12] *= qt[16]
487+
b[19] *= qt[17]
488+
b[26] *= qt[18]
489+
b[33] *= qt[19]
490+
b[40] *= qt[20]
491+
b[48] *= qt[21]
492+
b[41] *= qt[22]
493+
b[34] *= qt[23]
494+
b[27] *= qt[24]
495+
b[20] *= qt[25]
496+
b[13] *= qt[26]
497+
b[6] *= qt[27]
498+
b[7] *= qt[28]
499+
b[14] *= qt[29]
500+
b[21] *= qt[30]
501+
b[28] *= qt[31]
502+
b[35] *= qt[32]
503+
b[42] *= qt[33]
504+
b[49] *= qt[34]
505+
b[56] *= qt[35]
506+
b[57] *= qt[36]
507+
b[50] *= qt[37]
508+
b[43] *= qt[38]
509+
b[36] *= qt[39]
510+
b[29] *= qt[40]
511+
b[22] *= qt[41]
512+
b[15] *= qt[42]
513+
b[23] *= qt[43]
514+
b[30] *= qt[44]
515+
b[37] *= qt[45]
516+
b[44] *= qt[46]
517+
b[51] *= qt[47]
518+
b[58] *= qt[48]
519+
b[59] *= qt[49]
520+
b[52] *= qt[50]
521+
b[45] *= qt[51]
522+
b[38] *= qt[52]
523+
b[31] *= qt[53]
524+
b[39] *= qt[54]
525+
b[46] *= qt[55]
526+
b[53] *= qt[56]
527+
b[60] *= qt[57]
528+
b[61] *= qt[58]
529+
b[54] *= qt[59]
530+
b[47] *= qt[60]
531+
b[55] *= qt[61]
532+
b[62] *= qt[62]
533+
b[63] *= qt[63]
534+
471535
idct(b)
472536
dst, stride := []byte(nil), 0
473537
if d.nComp == 1 {
@@ -486,22 +550,82 @@ func (d *decoder) reconstructBlock(b *block, bx, by, compIndex int) error {
486550
return UnsupportedError("too many components")
487551
}
488552
}
553+
489554
// Level shift by +128, clip to [0, 255], and write to dst.
490-
for y := 0; y < 8; y++ {
491-
y8 := y * 8
492-
yStride := y * stride
493-
for x := 0; x < 8; x++ {
494-
c := b[y8+x]
495-
if c < -128 {
496-
c = 0
497-
} else if c > 127 {
498-
c = 255
499-
} else {
500-
c += 128
501-
}
502-
dst[yStride+x] = uint8(c)
555+
writeDst := func(index int) {
556+
c := (*b)[index] + 128
557+
if c < 0 {
558+
c = 0
559+
} else if c > 255 {
560+
c = 255
503561
}
562+
dst[(index/8)*stride+(index%8)] = uint8(c)
504563
}
564+
writeDst(0)
565+
writeDst(1)
566+
writeDst(2)
567+
writeDst(3)
568+
writeDst(4)
569+
writeDst(5)
570+
writeDst(6)
571+
writeDst(7)
572+
writeDst(8)
573+
writeDst(9)
574+
writeDst(10)
575+
writeDst(11)
576+
writeDst(12)
577+
writeDst(13)
578+
writeDst(14)
579+
writeDst(15)
580+
writeDst(16)
581+
writeDst(17)
582+
writeDst(18)
583+
writeDst(19)
584+
writeDst(20)
585+
writeDst(21)
586+
writeDst(22)
587+
writeDst(23)
588+
writeDst(24)
589+
writeDst(25)
590+
writeDst(26)
591+
writeDst(27)
592+
writeDst(28)
593+
writeDst(29)
594+
writeDst(30)
595+
writeDst(31)
596+
writeDst(32)
597+
writeDst(33)
598+
writeDst(34)
599+
writeDst(35)
600+
writeDst(36)
601+
writeDst(37)
602+
writeDst(38)
603+
writeDst(39)
604+
writeDst(40)
605+
writeDst(41)
606+
writeDst(42)
607+
writeDst(43)
608+
writeDst(44)
609+
writeDst(45)
610+
writeDst(46)
611+
writeDst(47)
612+
writeDst(48)
613+
writeDst(49)
614+
writeDst(50)
615+
writeDst(51)
616+
writeDst(52)
617+
writeDst(53)
618+
writeDst(54)
619+
writeDst(55)
620+
writeDst(56)
621+
writeDst(57)
622+
writeDst(58)
623+
writeDst(59)
624+
writeDst(60)
625+
writeDst(61)
626+
writeDst(62)
627+
writeDst(63)
628+
505629
return nil
506630
}
507631

0 commit comments

Comments
 (0)