Skip to content

Commit 043673b

Browse files
Add rerun script (#264)
* Add script for automatically re-running notebooks Co-authored-by: Thomas Wiecki <[email protected]> * Re-run lasso_block_update.ipynb notebook Co-authored-by: Thomas Wiecki <[email protected]>
1 parent eaf575a commit 043673b

File tree

2 files changed

+259
-59
lines changed

2 files changed

+259
-59
lines changed

examples/pymc3_howto/lasso_block_update.ipynb

+88-59
Large diffs are not rendered by default.

scripts/rerun.py

+171
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
"""
2+
This script/module may be used to re-run, commit & push notebooks
3+
from the CLI or from another Python script (via `import rerun`).
4+
5+
Run `python rerun.py -h` to show the CLI help.
6+
7+
The example below does the following:
8+
1. Re-runs the BEST notebook
9+
2. Commits changes to a branch "rerun-best"
10+
3. Push that branch to a remote named "mine"
11+
Assuming you did something like: `git add remote mine https://github.com/yourgithubusername/pymc-examples`
12+
13+
```
14+
python scripts/rerun.py --fp_notebook=examples/case_studies/BEST.ipynb --commit_to=rerun-best --push_to=mine
15+
```
16+
"""
17+
import argparse
18+
import logging
19+
import pathlib
20+
import subprocess
21+
22+
23+
logging.basicConfig(level=logging.INFO)
24+
_log = logging.getLogger(__file__)
25+
DP_REPO = pathlib.Path(__file__).absolute().parent.parent
26+
27+
REPLACEMENTS = {
28+
"az.from_pymc3": "pm.to_inference_data",
29+
"arviz.from_pymc3": "pm.to_inference_data",
30+
"pymc3": "pymc",
31+
"PyMC3": "PyMC",
32+
"pymc3": "pymc",
33+
"PyMC3": "PyMC",
34+
"from theano import tensor as tt": "import aesara.tensor as at",
35+
"import theano.tensor as tt": "import aesara.tensor as at",
36+
"tt.": "at.",
37+
"aet": "at",
38+
"studenat": "studentt",
39+
"theano": "aesara",
40+
"Theano": "Aesara",
41+
"pm.sample()": "pm.sample(return_inferencedata=False)",
42+
", return_inferencedata=True": "",
43+
"return_inferencedata=True, ": "",
44+
"return_inferencedata=True,": "",
45+
"return_inferencedata=True": "",
46+
}
47+
48+
49+
def apply_replacements(fp: pathlib.Path) -> bool:
50+
try:
51+
_log.info("⏳ Running API migration")
52+
with open(fp, "r", encoding="utf-8") as file:
53+
lines = file.read()
54+
55+
for pattern, substitute in REPLACEMENTS.items():
56+
lines = lines.replace(pattern, substitute)
57+
58+
with open(fp, "w", encoding="utf-8") as file:
59+
file.write(lines)
60+
return True
61+
except Exception as ex:
62+
_log.exception("❌ Failed to apply replacements.", exc_info=ex)
63+
return False
64+
65+
66+
def run_precommit(fp: pathlib.Path, attempts: int = 2):
67+
try:
68+
for a in range(attempts):
69+
_log.info("⏳ Running pre-commit attempt %i on %s", a, fp)
70+
try:
71+
subprocess.check_call(["pre-commit", "run", "--files", str(fp)])
72+
except subprocess.CalledProcessError:
73+
if a == attempts - 1:
74+
raise
75+
return True
76+
except Exception as ex:
77+
_log.exception("❌ Failed to run pre-commit.", exc_info=ex)
78+
return False
79+
80+
81+
def execute_notebook(fp: pathlib.Path) -> bool:
82+
try:
83+
_log.info("⏳ Executing notebook %s", fp)
84+
subprocess.check_call(
85+
[
86+
"jupyter",
87+
"nbconvert",
88+
"--ExecutePreprocessor.kernel_name=python3",
89+
"--ExecutePreprocessor.timeout=14000",
90+
"--execute",
91+
"--inplace",
92+
str(fp),
93+
]
94+
)
95+
_log.info("✔ Notebook executed successfully.")
96+
return True
97+
except subprocess.CalledProcessError as ex:
98+
_log.exception("❌ Failed to commit.", exc_info=ex)
99+
return False
100+
101+
102+
def commit(fp: pathlib.Path, branch: str) -> bool:
103+
try:
104+
_log.info("Switching to branch %s", branch)
105+
if branch not in subprocess.check_output(["git", "branch"]).decode("ascii"):
106+
subprocess.check_call(["git", "checkout", "-b", branch])
107+
else:
108+
subprocess.check_call(["git", "checkout", branch])
109+
110+
_log.info("⏳ Committing changes in %s to %s", fp, branch)
111+
subprocess.check_call(["git", "stage", str(fp)])
112+
subprocess.check_call(["git", "commit", "-m", f"Re-run {fp.name} notebook"])
113+
_log.info("✔ Changes in %s were commited to branch %s.", fp, branch)
114+
return True
115+
except subprocess.CalledProcessError as ex:
116+
_log.exception("❌ Failed to commit.", exc_info=ex)
117+
return False
118+
119+
120+
def push(branch, remote: str) -> bool:
121+
try:
122+
_log.info("⏳ Pushing %s to %s", branch, remote)
123+
subprocess.check_call(["git", "push", "-u", remote, f"{branch}:{branch}"])
124+
_log.info("✔ Pushed %s to %s/%s.", branch, remote, branch)
125+
return True
126+
except subprocess.CalledProcessError as ex:
127+
_log.exception("❌ Failed push.", exc_info=ex)
128+
return False
129+
130+
131+
def get_args():
132+
parser = argparse.ArgumentParser()
133+
parser.add_argument(
134+
"--fp_notebook",
135+
type=str,
136+
help=f"Absolute or relative path to a Jupyter notebook in {str(DP_REPO)}.",
137+
required=True,
138+
)
139+
parser.add_argument(
140+
"--commit_to",
141+
type=str,
142+
help="Name of a git branch to commit to on success.",
143+
required=False,
144+
)
145+
parser.add_argument(
146+
"--push_to", type=str, help="Name of a git remote to push to on success.", required=False
147+
)
148+
return parser.parse_args()
149+
150+
151+
if __name__ == "__main__":
152+
args = get_args()
153+
154+
fp = pathlib.Path(args.fp_notebook)
155+
if not fp.exists():
156+
raise FileNotFoundError(f"Notebook file {fp} does not exist.")
157+
158+
success = True
159+
success = success and apply_replacements(fp)
160+
success = success and run_precommit(fp)
161+
success = success and execute_notebook(fp)
162+
if args.commit_to:
163+
success = success and run_precommit(fp)
164+
success = success and commit(fp, args.commit_to)
165+
if success and args.push_to:
166+
success = success and push(args.commit_to, args.push_to)
167+
168+
if success:
169+
_log.info("✔ All steps succeeded.")
170+
else:
171+
_log.error("❌ Manual investigation needed.")

0 commit comments

Comments
 (0)