Skip to content

Commit 68621e1

Browse files
committed
bytes, strings: add Lines, SplitSeq, SplitAfterSeq, FieldsSeq, FieldsFuncSeq
Fixes golang#61901.
1 parent 1f0c044 commit 68621e1

File tree

7 files changed

+431
-0
lines changed

7 files changed

+431
-0
lines changed

api/next/61901.txt

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
pkg bytes, func FieldsFuncSeq([]uint8, func(int32) bool) iter.Seq[[]uint8] #61901
2+
pkg bytes, func FieldsSeq([]uint8) iter.Seq[[]uint8] #61901
3+
pkg bytes, func Lines([]uint8) iter.Seq[[]uint8] #61901
4+
pkg bytes, func SplitAfterSeq([]uint8, []uint8) iter.Seq[[]uint8] #61901
5+
pkg bytes, func SplitSeq([]uint8, []uint8) iter.Seq[[]uint8] #61901
6+
pkg strings, func FieldsFuncSeq(string, func(int32) bool) iter.Seq[string] #61901
7+
pkg strings, func FieldsSeq(string) iter.Seq[string] #61901
8+
pkg strings, func Lines(string) iter.Seq[string] #61901
9+
pkg strings, func SplitAfterSeq(string, string) iter.Seq[string] #61901
10+
pkg strings, func SplitSeq(string, string) iter.Seq[string] #61901
+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
The [bytes] package adds several functions that work with iterators:
2+
- [Lines] returns an iterator over the
3+
newline-terminated lines in the byte slice s.
4+
- [SplitSeq] returns an iterator over
5+
all substrings of s separated by sep.
6+
- [SplitAfterSeq] returns an iterator
7+
over substrings of s split after each instance of sep.
8+
- [FieldsSeq] returns an iterator over
9+
substrings of s split around runs of whitespace characters,
10+
as defined by unicode.IsSpace.
11+
- [FieldsFuncSeq] returns an iterator
12+
over substrings of s split around runs of Unicode code points satisfying f(c).
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
The [strings] package adds several functions that work with iterators:
2+
- [Lines] returns an iterator over
3+
the newline-terminated lines in the string s.
4+
- [SplitSeq] returns an iterator over
5+
all substrings of s separated by sep.
6+
- [SplitAfterSeq] returns an iterator
7+
over substrings of s split after each instance of sep.
8+
- [FieldsSeq] returns an iterator over
9+
substrings of s split around runs of whitespace characters,
10+
as defined by unicode.IsSpace.
11+
- [FieldsFuncSeq] returns an iterator
12+
over substrings of s split around runs of Unicode code points satisfying f(c).

src/bytes/bytes_test.go

+51
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
. "bytes"
99
"fmt"
1010
"internal/testenv"
11+
"iter"
1112
"math"
1213
"math/rand"
1314
"slices"
@@ -26,6 +27,31 @@ func sliceOfString(s [][]byte) []string {
2627
return result
2728
}
2829

30+
func collect(t *testing.T, seq iter.Seq[[]byte]) [][]byte {
31+
out := slices.Collect(seq)
32+
out1 := slices.Collect(seq)
33+
if !slices.Equal(sliceOfString(out), sliceOfString(out1)) {
34+
t.Fatalf("inconsistent seq:\n%s\n%s", out, out1)
35+
}
36+
return out
37+
}
38+
39+
var LinesTest = []string{
40+
"abc\nabc\n",
41+
"abc\r\nabc",
42+
"abc\r\n",
43+
"abc\n",
44+
}
45+
46+
func TestLines(t *testing.T) {
47+
for _, s := range LinesTest {
48+
result := Join(slices.Collect(Lines([]byte(s))), []byte(""))
49+
if string(result) != s {
50+
t.Errorf(`Join(collect(Lines(%q)), "") = %q`, s, result)
51+
}
52+
}
53+
}
54+
2955
// For ease of reading, the test cases use strings that are converted to byte
3056
// slices before invoking the functions.
3157

@@ -800,6 +826,14 @@ func TestSplit(t *testing.T) {
800826
t.Errorf(`Split(%q, %q, %d) = %v; want %v`, tt.s, tt.sep, tt.n, result, tt.a)
801827
continue
802828
}
829+
830+
if tt.n < 0 {
831+
b := sliceOfString(slices.Collect(SplitSeq([]byte(tt.s), []byte(tt.sep))))
832+
if !slices.Equal(b, tt.a) {
833+
t.Errorf(`collect(SplitSeq(%q, %q)) = %v; want %v`, tt.s, tt.sep, b, tt.a)
834+
}
835+
}
836+
803837
if tt.n == 0 || len(a) == 0 {
804838
continue
805839
}
@@ -859,6 +893,13 @@ func TestSplitAfter(t *testing.T) {
859893
continue
860894
}
861895

896+
if tt.n < 0 {
897+
b := sliceOfString(slices.Collect(SplitAfterSeq([]byte(tt.s), []byte(tt.sep))))
898+
if !slices.Equal(b, tt.a) {
899+
t.Errorf(`collect(SplitAfterSeq(%q, %q)) = %v; want %v`, tt.s, tt.sep, b, tt.a)
900+
}
901+
}
902+
862903
if want := tt.a[len(tt.a)-1] + "z"; string(x) != want {
863904
t.Errorf("last appended result was %s; want %s", x, want)
864905
}
@@ -912,6 +953,11 @@ func TestFields(t *testing.T) {
912953
continue
913954
}
914955

956+
result2 := sliceOfString(collect(t, FieldsSeq([]byte(tt.s))))
957+
if !slices.Equal(result2, tt.a) {
958+
t.Errorf(`collect(FieldsSeq(%q)) = %v; want %v`, tt.s, result2, tt.a)
959+
}
960+
915961
if string(b) != tt.s {
916962
t.Errorf("slice changed to %s; want %s", string(b), tt.s)
917963
}
@@ -954,6 +1000,11 @@ func TestFieldsFunc(t *testing.T) {
9541000
t.Errorf("FieldsFunc(%q) = %v, want %v", tt.s, a, tt.a)
9551001
}
9561002

1003+
result2 := sliceOfString(collect(t, FieldsFuncSeq([]byte(tt.s), pred)))
1004+
if !slices.Equal(result2, tt.a) {
1005+
t.Errorf(`collect(FieldsFuncSeq(%q)) = %v; want %v`, tt.s, result2, tt.a)
1006+
}
1007+
9571008
if string(b) != tt.s {
9581009
t.Errorf("slice changed to %s; want %s", b, tt.s)
9591010
}

src/bytes/iter.go

+150
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
// Copyright 2024 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
package bytes
6+
7+
import (
8+
"iter"
9+
"unicode"
10+
"unicode/utf8"
11+
)
12+
13+
// Lines returns an iterator over the newline-terminated lines in the byte slice s.
14+
// The lines yielded by the iterator include their terminating newlines.
15+
// If s is empty, the iterator yields no lines at all.
16+
// If s does not end in a newline, the final yielded line will not end in a newline.
17+
// It returns a single-use iterator.
18+
func Lines(s []byte) iter.Seq[[]byte] {
19+
return func(yield func([]byte) bool) {
20+
for len(s) > 0 {
21+
var line []byte
22+
if i := IndexByte(s, '\n'); i >= 0 {
23+
line, s = s[:i+1], s[i+1:]
24+
} else {
25+
line, s = s, nil
26+
}
27+
if !yield(line) {
28+
return
29+
}
30+
}
31+
return
32+
}
33+
}
34+
35+
// explodeSeq returns an iterator over the runes in s.
36+
func explodeSeq(s []byte) iter.Seq[[]byte] {
37+
return func(yield func([]byte) bool) {
38+
for len(s) > 0 {
39+
_, size := utf8.DecodeRune(s)
40+
if !yield(s[:size]) {
41+
return
42+
}
43+
s = s[size:]
44+
}
45+
}
46+
}
47+
48+
// splitSeq is SplitSeq or SplitAfterSeq, configured by how many
49+
// bytes of sep to include in the results (none or all).
50+
func splitSeq(s, sep []byte, sepSave int) iter.Seq[[]byte] {
51+
if len(sep) == 0 {
52+
return explodeSeq(s)
53+
}
54+
return func(yield func([]byte) bool) {
55+
for {
56+
i := Index(s, sep)
57+
if i < 0 {
58+
break
59+
}
60+
frag := s[:i+sepSave]
61+
if !yield(frag) {
62+
return
63+
}
64+
s = s[i+len(sep):]
65+
}
66+
yield(s)
67+
}
68+
}
69+
70+
// SplitSeq returns an iterator over all substrings of s separated by sep.
71+
// The iterator yields the same strings that would be returned by Split(s, sep),
72+
// but without constructing the slice.
73+
// It returns a single-use iterator.
74+
func SplitSeq(s, sep []byte) iter.Seq[[]byte] {
75+
return splitSeq(s, sep, 0)
76+
}
77+
78+
// SplitAfterSeq returns an iterator over substrings of s split after each instance of sep.
79+
// The iterator yields the same strings that would be returned by SplitAfter(s, sep),
80+
// but without constructing the slice.
81+
// It returns a single-use iterator.
82+
func SplitAfterSeq(s, sep []byte) iter.Seq[[]byte] {
83+
return splitSeq(s, sep, len(sep))
84+
}
85+
86+
// FieldsSeq returns an iterator over substrings of s split around runs of
87+
// whitespace characters, as defined by unicode.IsSpace.
88+
// The iterator yields the same strings that would be returned by Fields(s),
89+
// but without constructing the slice.
90+
func FieldsSeq(s []byte) iter.Seq[[]byte] {
91+
return func(yield func([]byte) bool) {
92+
s := s
93+
start := -1
94+
for i := 0; i < len(s); {
95+
size := 1
96+
r := rune(s[i])
97+
isSpace := asciiSpace[s[i]] != 0
98+
if r >= utf8.RuneSelf {
99+
r, size = utf8.DecodeRune(s[i:])
100+
isSpace = unicode.IsSpace(r)
101+
}
102+
if isSpace {
103+
if start >= 0 {
104+
if !yield(s[start:i]) {
105+
return
106+
}
107+
start = -1
108+
}
109+
} else if start < 0 {
110+
start = i
111+
}
112+
i += size
113+
}
114+
if start >= 0 {
115+
yield(s[start:])
116+
}
117+
}
118+
}
119+
120+
// FieldsFuncSeq returns an iterator over substrings of s split around runs of
121+
// Unicode code points satisfying f(c).
122+
// The iterator yields the same strings that would be returned by FieldsFunc(s),
123+
// but without constructing the slice.
124+
func FieldsFuncSeq(s []byte, f func(rune) bool) iter.Seq[[]byte] {
125+
return func(yield func([]byte) bool) {
126+
s := s
127+
start := -1
128+
for i := 0; i < len(s); {
129+
size := 1
130+
r := rune(s[i])
131+
if r >= utf8.RuneSelf {
132+
r, size = utf8.DecodeRune(s[i:])
133+
}
134+
if f(r) {
135+
if start >= 0 {
136+
if !yield(s[start:i]) {
137+
return
138+
}
139+
start = -1
140+
}
141+
} else if start < 0 {
142+
start = i
143+
}
144+
i += size
145+
}
146+
if start >= 0 {
147+
yield(s[start:])
148+
}
149+
}
150+
}

0 commit comments

Comments
 (0)