Skip to content

Commit 0b494d1

Browse files
committed
io: add benchmark for TFRecord reading
This reads a single event file from start to end, parsing the frame of each TFRecord, but not inspecting the payload at all. Unfortunately, the TFRecord reading is already ~3× slower than RustBoard’s entire pipeline, and ~13× slower than RustBoard’s TFRecord reading. :-( Discussion here is on a 248 MiB event file, the `egraph_edge_cgan_003` run from a user-supplied log directory: <tensorflow/tensorboard#766 (comment)> The effect of the buffer size is a bit strange. As expected, buffering definitely helps (~3× improvement with default buffer size), and the improvements taper off as the buffer size increases: 4 KiB and 1 MiB are about the same. But then in the 4 MiB to 8 MiB range we start to see sharp improvements: 1 MiB to 4 MiB is no change, but 4 MiB to 8 MiB is 25% faster. The improvements continue even up to 128 or 256 MiB on a file that’s 248 MiB long. Compare to RustBoard, which sees similar effects at low buffer sizes but no extra improvements for very large buffers. (This all running with hot filesystem cache.) Buffer size sweep: <https://gist.github.com/wchargin/b73b5af3ef36b88e4e1aacf9a2453ea6> CPU profiling shows that about 35% of time is spent in `make([]byte)` in `ExtendTo`, which seems unfortunate, since due to the actual access patterns we barely overallocate there (overallocating only 12 bytes per record), so it’s not obvious how to avoid that cost. Another 50% of total time is spent in `runtime.mallocgc`. And 20% of total time (not necessarily disjoint) is spent in the `result := TFRecord{...}` final allocation in `ReadRecord`, which is surprising to me since it just has two small fields (a slice header and a `uint32`) and they’ve already been computed above. (Inlining effects?) Checksum validation is fast when enabled; runtime increases by ~10%.
1 parent e582756 commit 0b494d1

File tree

1 file changed

+94
-0
lines changed

1 file changed

+94
-0
lines changed

io/cmd/tfrecord_bench/main.go

+94
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
package main
2+
3+
import (
4+
"bufio"
5+
"flag"
6+
"fmt"
7+
"io"
8+
"os"
9+
"runtime"
10+
"runtime/pprof"
11+
12+
tbio "github.com/wchargin/tensorboard-data-server/io"
13+
)
14+
15+
var cpuprofile = flag.String("cpuprofile", "", "write cpu profile to file")
16+
var memprofile = flag.String("memprofile", "", "write memory profile to this file")
17+
18+
var bufsize = flag.Int("bufsize", 8192, "bufio.NewReaderSize capacity")
19+
20+
var checksum = flag.Bool("checksum", false, "validate TFRecord payloads against CRC-32")
21+
22+
func main() {
23+
flag.Parse()
24+
args := flag.Args()
25+
if len(args) != 1 {
26+
fmt.Fprintf(os.Stderr, "must specify event file path as only argument\n")
27+
os.Exit(1)
28+
}
29+
30+
// Profiling structure copied from "go doc runtime/pprof".
31+
if *cpuprofile != "" {
32+
f, err := os.Create(*cpuprofile)
33+
if err != nil {
34+
fmt.Fprintf(os.Stderr, "creating CPU profile: %v\n", err)
35+
os.Exit(1)
36+
}
37+
pprof.StartCPUProfile(f)
38+
defer pprof.StopCPUProfile()
39+
}
40+
41+
ReadAllRecords(args[0])
42+
43+
if *memprofile != "" {
44+
fmt.Println("creating memory profile...")
45+
f, err := os.Create(*memprofile)
46+
if err != nil {
47+
fmt.Fprintf(os.Stderr, "creating memory profile: %v\n", err)
48+
os.Exit(1)
49+
}
50+
defer f.Close()
51+
runtime.GC()
52+
if err := pprof.WriteHeapProfile(f); err != nil {
53+
fmt.Fprintf(os.Stderr, "writing memory profile: %v\n", err)
54+
}
55+
fmt.Println("done with memory profile")
56+
}
57+
}
58+
59+
func ReadAllRecords(fileName string) {
60+
rawFile, err := os.Open(fileName)
61+
if err != nil {
62+
panic(err)
63+
}
64+
f := bufio.NewReaderSize(rawFile, *bufsize)
65+
recordsRead := 0
66+
totalPayloadSize := 0
67+
for {
68+
payloadSize, more := ProcessOneRecord(f)
69+
recordsRead++
70+
totalPayloadSize += payloadSize
71+
if !more {
72+
break
73+
}
74+
}
75+
fmt.Printf("all done; read %v records (%v bytes payload)\n", recordsRead, totalPayloadSize)
76+
}
77+
78+
func ProcessOneRecord(r io.Reader) (payloadSize int, more bool) {
79+
var state *tbio.TFRecordState
80+
rec, err := tbio.ReadRecord(&state, r)
81+
if err == io.EOF {
82+
return 0, false
83+
}
84+
if err != nil {
85+
fmt.Fprintf(os.Stderr, "reading record: %v\n", err)
86+
os.Exit(1)
87+
}
88+
if *checksum {
89+
if err := rec.Checksum(); err != nil {
90+
fmt.Fprintf(os.Stderr, "checksum failure: %v\n", err)
91+
}
92+
}
93+
return len(rec.Data), true
94+
}

0 commit comments

Comments
 (0)