Skip to content

Commit 9becf40

Browse files
aimuzgopherbot
authored andcommitted
bytes, strings: add Lines, SplitSeq, SplitAfterSeq, FieldsSeq, FieldsFuncSeq
Fixes #61901. Change-Id: I4db21c91fd21079f2aa3bc81fb03dd6f40423a38 GitHub-Last-Rev: ed3df56 GitHub-Pull-Request: #67543 Reviewed-on: https://go-review.googlesource.com/c/go/+/587095 Auto-Submit: Ian Lance Taylor <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]> Reviewed-by: Carlos Amedee <[email protected]> Reviewed-by: Ian Lance Taylor <[email protected]>
1 parent 3e3ce20 commit 9becf40

File tree

7 files changed

+439
-0
lines changed

7 files changed

+439
-0
lines changed

api/next/61901.txt

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
pkg bytes, func FieldsFuncSeq([]uint8, func(int32) bool) iter.Seq[[]uint8] #61901
2+
pkg bytes, func FieldsSeq([]uint8) iter.Seq[[]uint8] #61901
3+
pkg bytes, func Lines([]uint8) iter.Seq[[]uint8] #61901
4+
pkg bytes, func SplitAfterSeq([]uint8, []uint8) iter.Seq[[]uint8] #61901
5+
pkg bytes, func SplitSeq([]uint8, []uint8) iter.Seq[[]uint8] #61901
6+
pkg strings, func FieldsFuncSeq(string, func(int32) bool) iter.Seq[string] #61901
7+
pkg strings, func FieldsSeq(string) iter.Seq[string] #61901
8+
pkg strings, func Lines(string) iter.Seq[string] #61901
9+
pkg strings, func SplitAfterSeq(string, string) iter.Seq[string] #61901
10+
pkg strings, func SplitSeq(string, string) iter.Seq[string] #61901
+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
The [bytes] package adds several functions that work with iterators:
2+
- [Lines] returns an iterator over the
3+
newline-terminated lines in the byte slice s.
4+
- [SplitSeq] returns an iterator over
5+
all substrings of s separated by sep.
6+
- [SplitAfterSeq] returns an iterator
7+
over substrings of s split after each instance of sep.
8+
- [FieldsSeq] returns an iterator over
9+
substrings of s split around runs of whitespace characters,
10+
as defined by unicode.IsSpace.
11+
- [FieldsFuncSeq] returns an iterator
12+
over substrings of s split around runs of Unicode code points satisfying f(c).
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
The [strings] package adds several functions that work with iterators:
2+
- [Lines] returns an iterator over
3+
the newline-terminated lines in the string s.
4+
- [SplitSeq] returns an iterator over
5+
all substrings of s separated by sep.
6+
- [SplitAfterSeq] returns an iterator
7+
over substrings of s split after each instance of sep.
8+
- [FieldsSeq] returns an iterator over
9+
substrings of s split around runs of whitespace characters,
10+
as defined by unicode.IsSpace.
11+
- [FieldsFuncSeq] returns an iterator
12+
over substrings of s split around runs of Unicode code points satisfying f(c).

src/bytes/bytes_test.go

+57
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
. "bytes"
99
"fmt"
1010
"internal/testenv"
11+
"iter"
1112
"math"
1213
"math/rand"
1314
"slices"
@@ -26,6 +27,37 @@ func sliceOfString(s [][]byte) []string {
2627
return result
2728
}
2829

30+
func collect(t *testing.T, seq iter.Seq[[]byte]) [][]byte {
31+
out := slices.Collect(seq)
32+
out1 := slices.Collect(seq)
33+
if !slices.Equal(sliceOfString(out), sliceOfString(out1)) {
34+
t.Fatalf("inconsistent seq:\n%s\n%s", out, out1)
35+
}
36+
return out
37+
}
38+
39+
type LinesTest struct {
40+
a string
41+
b []string
42+
}
43+
44+
var linesTests = []LinesTest{
45+
{a: "abc\nabc\n", b: []string{"abc\n", "abc\n"}},
46+
{a: "abc\r\nabc", b: []string{"abc\r\n", "abc"}},
47+
{a: "abc\r\n", b: []string{"abc\r\n"}},
48+
{a: "\nabc", b: []string{"\n", "abc"}},
49+
{a: "\nabc\n\n", b: []string{"\n", "abc\n", "\n"}},
50+
}
51+
52+
func TestLines(t *testing.T) {
53+
for _, s := range linesTests {
54+
result := sliceOfString(slices.Collect(Lines([]byte(s.a))))
55+
if !slices.Equal(result, s.b) {
56+
t.Errorf(`slices.Collect(Lines(%q)) = %q; want %q`, s.a, result, s.b)
57+
}
58+
}
59+
}
60+
2961
// For ease of reading, the test cases use strings that are converted to byte
3062
// slices before invoking the functions.
3163

@@ -800,6 +832,14 @@ func TestSplit(t *testing.T) {
800832
t.Errorf(`Split(%q, %q, %d) = %v; want %v`, tt.s, tt.sep, tt.n, result, tt.a)
801833
continue
802834
}
835+
836+
if tt.n < 0 {
837+
b := sliceOfString(slices.Collect(SplitSeq([]byte(tt.s), []byte(tt.sep))))
838+
if !slices.Equal(b, tt.a) {
839+
t.Errorf(`collect(SplitSeq(%q, %q)) = %v; want %v`, tt.s, tt.sep, b, tt.a)
840+
}
841+
}
842+
803843
if tt.n == 0 || len(a) == 0 {
804844
continue
805845
}
@@ -859,6 +899,13 @@ func TestSplitAfter(t *testing.T) {
859899
continue
860900
}
861901

902+
if tt.n < 0 {
903+
b := sliceOfString(slices.Collect(SplitAfterSeq([]byte(tt.s), []byte(tt.sep))))
904+
if !slices.Equal(b, tt.a) {
905+
t.Errorf(`collect(SplitAfterSeq(%q, %q)) = %v; want %v`, tt.s, tt.sep, b, tt.a)
906+
}
907+
}
908+
862909
if want := tt.a[len(tt.a)-1] + "z"; string(x) != want {
863910
t.Errorf("last appended result was %s; want %s", x, want)
864911
}
@@ -912,6 +959,11 @@ func TestFields(t *testing.T) {
912959
continue
913960
}
914961

962+
result2 := sliceOfString(collect(t, FieldsSeq([]byte(tt.s))))
963+
if !slices.Equal(result2, tt.a) {
964+
t.Errorf(`collect(FieldsSeq(%q)) = %v; want %v`, tt.s, result2, tt.a)
965+
}
966+
915967
if string(b) != tt.s {
916968
t.Errorf("slice changed to %s; want %s", string(b), tt.s)
917969
}
@@ -954,6 +1006,11 @@ func TestFieldsFunc(t *testing.T) {
9541006
t.Errorf("FieldsFunc(%q) = %v, want %v", tt.s, a, tt.a)
9551007
}
9561008

1009+
result2 := sliceOfString(collect(t, FieldsFuncSeq([]byte(tt.s), pred)))
1010+
if !slices.Equal(result2, tt.a) {
1011+
t.Errorf(`collect(FieldsFuncSeq(%q)) = %v; want %v`, tt.s, result2, tt.a)
1012+
}
1013+
9571014
if string(b) != tt.s {
9581015
t.Errorf("slice changed to %s; want %s", b, tt.s)
9591016
}

src/bytes/iter.go

+148
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
// Copyright 2024 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
package bytes
6+
7+
import (
8+
"iter"
9+
"unicode"
10+
"unicode/utf8"
11+
)
12+
13+
// Lines returns an iterator over the newline-terminated lines in the byte slice s.
14+
// The lines yielded by the iterator include their terminating newlines.
15+
// If s is empty, the iterator yields no lines at all.
16+
// If s does not end in a newline, the final yielded line will not end in a newline.
17+
// It returns a single-use iterator.
18+
func Lines(s []byte) iter.Seq[[]byte] {
19+
return func(yield func([]byte) bool) {
20+
for len(s) > 0 {
21+
var line []byte
22+
if i := IndexByte(s, '\n'); i >= 0 {
23+
line, s = s[:i+1], s[i+1:]
24+
} else {
25+
line, s = s, nil
26+
}
27+
if !yield(line[:len(line):len(line)]) {
28+
return
29+
}
30+
}
31+
return
32+
}
33+
}
34+
35+
// explodeSeq returns an iterator over the runes in s.
36+
func explodeSeq(s []byte) iter.Seq[[]byte] {
37+
return func(yield func([]byte) bool) {
38+
for len(s) > 0 {
39+
_, size := utf8.DecodeRune(s)
40+
if !yield(s[:size:size]) {
41+
return
42+
}
43+
s = s[size:]
44+
}
45+
}
46+
}
47+
48+
// splitSeq is SplitSeq or SplitAfterSeq, configured by how many
49+
// bytes of sep to include in the results (none or all).
50+
func splitSeq(s, sep []byte, sepSave int) iter.Seq[[]byte] {
51+
if len(sep) == 0 {
52+
return explodeSeq(s)
53+
}
54+
return func(yield func([]byte) bool) {
55+
for {
56+
i := Index(s, sep)
57+
if i < 0 {
58+
break
59+
}
60+
frag := s[:i+sepSave]
61+
if !yield(frag[:len(frag):len(frag)]) {
62+
return
63+
}
64+
s = s[i+len(sep):]
65+
}
66+
yield(s[:len(s):len(s)])
67+
}
68+
}
69+
70+
// SplitSeq returns an iterator over all substrings of s separated by sep.
71+
// The iterator yields the same strings that would be returned by Split(s, sep),
72+
// but without constructing the slice.
73+
// It returns a single-use iterator.
74+
func SplitSeq(s, sep []byte) iter.Seq[[]byte] {
75+
return splitSeq(s, sep, 0)
76+
}
77+
78+
// SplitAfterSeq returns an iterator over substrings of s split after each instance of sep.
79+
// The iterator yields the same strings that would be returned by SplitAfter(s, sep),
80+
// but without constructing the slice.
81+
// It returns a single-use iterator.
82+
func SplitAfterSeq(s, sep []byte) iter.Seq[[]byte] {
83+
return splitSeq(s, sep, len(sep))
84+
}
85+
86+
// FieldsSeq returns an iterator over substrings of s split around runs of
87+
// whitespace characters, as defined by unicode.IsSpace.
88+
// The iterator yields the same strings that would be returned by Fields(s),
89+
// but without constructing the slice.
90+
func FieldsSeq(s []byte) iter.Seq[[]byte] {
91+
return func(yield func([]byte) bool) {
92+
start := -1
93+
for i := 0; i < len(s); {
94+
size := 1
95+
r := rune(s[i])
96+
isSpace := asciiSpace[s[i]] != 0
97+
if r >= utf8.RuneSelf {
98+
r, size = utf8.DecodeRune(s[i:])
99+
isSpace = unicode.IsSpace(r)
100+
}
101+
if isSpace {
102+
if start >= 0 {
103+
if !yield(s[start:i:i]) {
104+
return
105+
}
106+
start = -1
107+
}
108+
} else if start < 0 {
109+
start = i
110+
}
111+
i += size
112+
}
113+
if start >= 0 {
114+
yield(s[start:len(s):len(s)])
115+
}
116+
}
117+
}
118+
119+
// FieldsFuncSeq returns an iterator over substrings of s split around runs of
120+
// Unicode code points satisfying f(c).
121+
// The iterator yields the same strings that would be returned by FieldsFunc(s),
122+
// but without constructing the slice.
123+
func FieldsFuncSeq(s []byte, f func(rune) bool) iter.Seq[[]byte] {
124+
return func(yield func([]byte) bool) {
125+
start := -1
126+
for i := 0; i < len(s); {
127+
size := 1
128+
r := rune(s[i])
129+
if r >= utf8.RuneSelf {
130+
r, size = utf8.DecodeRune(s[i:])
131+
}
132+
if f(r) {
133+
if start >= 0 {
134+
if !yield(s[start:i:i]) {
135+
return
136+
}
137+
start = -1
138+
}
139+
} else if start < 0 {
140+
start = i
141+
}
142+
i += size
143+
}
144+
if start >= 0 {
145+
yield(s[start:len(s):len(s)])
146+
}
147+
}
148+
}

0 commit comments

Comments
 (0)