Skip to content

Commit a842bef

Browse files
committed
mzbuild: hoist debuginfo upload out of pre-image steps
We've had several bugs where building Materialize locally using mzbuild has failed because mzbuild erroneously tried to upload debuginfo to S3 and Polar Signals outside of CI [0][1][2]. The core of the issue is that we used a pre-image step to execute a *side effects*, i.e., uploading debuginfo to S3 or Polar Signals. mzbuild was designed to be an input-addressable build system: a (relatively) pure function from a set of input files to a packaged image. Uploading files to another system is not part of this core transformation from input files to built Docker image. So, this commit refactors mzbuild to treat uploading debuginfo to S3 or Polar Signals as the side effect that it is. The core mzbuild system no longer has any special pre-image actions for uploading debuginfo. Instead, mzbuild exposes a simple "post build" callback, which allows the caller to executing an arbitrary function whenever an image build is complete. CI, then, uses this new post build callback to upload debuginfo to S3 or PolarSignals as appropriate. Local builds (e.g., via `mzcompose`) do not install a post build callback, and so are guaranteed not to upload debuginfo when they shouldn't. [0]: #20031 [1]: #22523 [2]: #23261
1 parent 727fb97 commit a842bef

File tree

6 files changed

+163
-232
lines changed

6 files changed

+163
-232
lines changed

ci/test/build.py

Lines changed: 145 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,22 +9,35 @@
99
# the Business Source License, use of this software will be governed
1010
# by the Apache License, Version 2.0.
1111

12+
import os
13+
import subprocess
14+
import tempfile
1215
from pathlib import Path
1316

14-
from materialize import mzbuild, spawn, ui
17+
import boto3
18+
19+
from materialize import elf, mzbuild, spawn, ui
20+
from materialize.mzbuild import CargoBuild, ResolvedImage
1521
from materialize.xcompile import Arch
1622

23+
# The S3 bucket in which to store debuginfo.
24+
DEBUGINFO_S3_BUCKET = "materialize-debuginfo"
25+
26+
# The binaries for which debuginfo should be uploaded to S3 and Polar Signals.
27+
DEBUGINFO_BINS = ["environmentd", "clusterd"]
28+
1729

1830
def main() -> None:
1931
coverage = ui.env_is_truthy("CI_COVERAGE_ENABLED")
20-
stable = ui.env_is_truthy("BUILDKITE_TAG")
21-
repo = mzbuild.Repository(Path("."), coverage=coverage, stable=stable)
32+
repo = mzbuild.Repository(Path("."), coverage=coverage)
2233

2334
# Build and push any images that are not already available on Docker Hub,
2435
# so they are accessible to other build agents.
2536
print("--- Acquiring mzbuild images")
37+
built_images = set()
2638
deps = repo.resolve_dependencies(image for image in repo if image.publish)
27-
deps.ensure()
39+
deps.ensure(post_build=lambda image: built_images.add(image))
40+
maybe_upload_debuginfo(repo, built_images)
2841
annotate_buildkite_with_tags(repo.rd.arch, deps)
2942

3043

@@ -40,5 +53,133 @@ def annotate_buildkite_with_tags(arch: Arch, deps: mzbuild.DependencySet) -> Non
4053
)
4154

4255

56+
def maybe_upload_debuginfo(
57+
repo: mzbuild.Repository, built_images: set[ResolvedImage]
58+
) -> None:
59+
"""Uploads debuginfo to `DEBUGINFO_S3_BUCKET` and Polar Signals if any
60+
DEBUGINFO_BINS were built."""
61+
62+
# Find all binaries created by the `cargo-bin` pre-image.
63+
bins: set[str] = set()
64+
for image in built_images:
65+
for pre_image in image.image.pre_images:
66+
if isinstance(pre_image, CargoBuild):
67+
for bin in pre_image.bins:
68+
if bin in DEBUGINFO_BINS:
69+
bins.add(bin)
70+
if not bins:
71+
return
72+
73+
print(f"Uploading debuginfo for {', '.join(bins)}...")
74+
75+
s3 = boto3.client("s3")
76+
is_tag_build = ui.env_is_truthy("BUILDKITE_TAG")
77+
polar_signals_api_token = os.environ["POLAR_SIGNALS_API_TOKEN"]
78+
79+
for bin in bins:
80+
cargo_profile = "release" if repo.rd.release_mode else "debug"
81+
bin_path = repo.rd.cargo_target_dir() / cargo_profile / bin
82+
dbg_path = bin_path.with_suffix(bin_path.suffix + ".debug")
83+
spawn.runv(
84+
[
85+
*repo.rd.tool("objcopy"),
86+
bin_path,
87+
dbg_path,
88+
"--only-keep-debug",
89+
],
90+
)
91+
92+
# Upload binary and debuginfo to S3 bucket, regardless of whether this
93+
# is a tag build or not. S3 is cheap.
94+
with open(bin_path, "rb") as exe, open(dbg_path, "rb") as dbg:
95+
build_id = elf.get_build_id(exe)
96+
assert build_id.isalnum()
97+
assert len(build_id) > 0
98+
99+
dbg_build_id = elf.get_build_id(dbg)
100+
assert build_id == dbg_build_id
101+
102+
for fileobj, name in [
103+
(exe, "executable"),
104+
(dbg, "debuginfo"),
105+
]:
106+
key = f"buildid/{build_id}/{name}"
107+
print(f"Uploading {name} to s3://{DEBUGINFO_S3_BUCKET}/{key}...")
108+
fileobj.seek(0)
109+
s3.upload_fileobj(
110+
Fileobj=fileobj,
111+
Bucket=DEBUGINFO_S3_BUCKET,
112+
Key=key,
113+
ExtraArgs={
114+
"Tagging": f"ephemeral={'false' if is_tag_build else 'true'}",
115+
},
116+
)
117+
118+
# Upload debuginfo and sources to Polar Signals (our continuous
119+
# profiling provider), but only if this is a tag build. Polar Signals is
120+
# expensive, so we don't want to upload development or unstable builds
121+
# that won't ever be profiled by Polar Signals.
122+
if is_tag_build:
123+
print(f"Uploading debuginfo for {bin} to Polar Signals...")
124+
spawn.runv(
125+
[
126+
"parca-debuginfo",
127+
"upload",
128+
"--store-address=grpc.polarsignals.com:443",
129+
"--no-extract",
130+
dbg_path,
131+
],
132+
cwd=repo.rd.root,
133+
env=dict(
134+
os.environ, PARCA_DEBUGINFO_BEARER_TOKEN=polar_signals_api_token
135+
),
136+
)
137+
138+
print(f"Constructing source tarball for {bin}...")
139+
with tempfile.NamedTemporaryFile() as tarball:
140+
p1 = subprocess.Popen(
141+
["llvm-dwarfdump", "--show-sources", bin_path],
142+
stdout=subprocess.PIPE,
143+
)
144+
p2 = subprocess.Popen(
145+
[
146+
"tar",
147+
"-cf",
148+
tarball.name,
149+
"--zstd",
150+
"-T",
151+
"-",
152+
"--ignore-failed-read",
153+
],
154+
stdin=p1.stdout,
155+
stdout=subprocess.PIPE,
156+
)
157+
158+
# This causes p1 to receive SIGPIPE if p2 exits early,
159+
# like in the shell.
160+
assert p1.stdout
161+
p1.stdout.close()
162+
163+
for p in [p1, p2]:
164+
if p.returncode:
165+
raise subprocess.CalledProcessError(p.returncode, p.args)
166+
167+
print(f"Uploading source tarball for {bin} to Polar Signals...")
168+
spawn.runv(
169+
[
170+
"parca-debuginfo",
171+
"upload",
172+
"--store-address=grpc.polarsignals.com:443",
173+
"--type=sources",
174+
f"--build-id={build_id}",
175+
tarball.name,
176+
],
177+
cwd=repo.rd.root,
178+
env=dict(
179+
os.environ, PARCA_DEBUGINFO_BEARER_TOKEN=polar_signals_api_token
180+
),
181+
)
182+
183+
43184
if __name__ == "__main__":
44185
main()

doc/developer/mzbuild.md

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -418,10 +418,7 @@ publish: true
418418
`.cargo/config` files.
419419

420420
Cargo is invoked with the `--release` flag unless the `--dev` flag is
421-
specified. The binary's debuginfo can be extracted and copied to a file in
422-
the same directory named `BIN.debug` by specifying `split-debuginfo: true`.
423-
The binary itself will be stripped of debug information unless `strip:
424-
false` is requested.
421+
specified.
425422

426423
In rare cases, it may be necessary to extract files from the build
427424
directory of a dependency. The `extract` key specifies a mapping from a

misc/images/balancerd/mzbuild.yml

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,4 @@ description: Ingress balancer.
1212
pre-image:
1313
- type: cargo-build
1414
bin: [mz-balancerd]
15-
strip: true
1615
split_debuginfo: true
17-
- type: s3-upload-debuginfo
18-
bin: mz-balancerd
19-
bucket: materialize-debuginfo
20-
- type: ps-upload-sources
21-
bin: mz-balancerd.debug

0 commit comments

Comments
 (0)