@@ -116,7 +116,7 @@ train-and-report:
116
116
` ` `
117
117
118
118
</tab>
119
- <tab title="Bitbucket">
119
+ <tab title="Bitbucket (no GPU) ">
120
120
121
121
` ` ` yaml
122
122
pipelines :
@@ -134,7 +134,6 @@ pipelines:
134
134
- step :
135
135
runs-on : [self.hosted, cml.runner]
136
136
image : iterativeai/cml:0-dvc2-base1
137
- # GPU not yet supported, see https://github.com/iterative/cml/issues/1015
138
137
script :
139
138
- pip install -r requirements.txt
140
139
- python train.py # generate plot.png
@@ -144,6 +143,56 @@ pipelines:
144
143
- cml comment create report.md
145
144
` ` `
146
145
146
+ </tab>
147
+ <tab title="Bitbucket">
148
+
149
+ Bitbucket does not support GPUs natively
150
+ ([cml#1015](https://github.com/iterative/cml/issues/1015),
151
+ [BCLOUD-21459](https://jira.atlassian.com/browse/BCLOUD-21459)). A work-around
152
+ is to directly use
153
+ [TPI](https://github.com/iterative/terraform-provider-iterative) (the library
154
+ which CML ` runner` uses internally). TPI includes a CLI-friendly helper called
155
+ LEO (launch, execute, orchestrate), used below :
156
+
157
+ ` ` ` yaml
158
+ image: iterativeai/cml:0-dvc2-base1
159
+ pipelines:
160
+ default:
161
+ - step:
162
+ name: Launch Runner and Train
163
+ script:
164
+ # Create training script
165
+ - |
166
+ cat <<EOF > leo-script.sh
167
+ #!/bin/bash
168
+ apt-get update -q && apt-get install -yq python3.9
169
+ pip3 install -r requirements.txt
170
+ python train.py # generate plot.png
171
+ EOF
172
+ # Launch runner
173
+ - |
174
+ LEO_OPTIONS="--cloud=aws --region=us-west"
175
+ leo_id=$(leo create $LEO_OPTIONS \
176
+ --image=nvidia
177
+ --machine=p2.xlarge \
178
+ --disk-size=64 \
179
+ --workdir=. \
180
+ --output=. \
181
+ --environment AWS_ACCESS_KEY_ID="$AWS_ACCESS_KEY_ID" \
182
+ --environment AWS_SECRET_ACCESS_KEY="$AWS_SECRET_ACCESS_KEY" \
183
+ --script="$(cat ./leo-script.sh)"
184
+ )
185
+ # Wait for cloud training to finish
186
+ leo read $LEO_OPTIONS --follow "$leo_id"
187
+ sleep 45 # TODO: explain
188
+ # Download cloud training results & clean up cloud resources
189
+ leo delete $LEO_OPTIONS --workdir=. --output=. "$leo_id"
190
+ # Create CML report
191
+ - cat metrics.txt >> report.md
192
+ - echo '' >> report.md
193
+ - cml comment create report.md
194
+ ` ` `
195
+
147
196
</tab>
148
197
</toggle>
149
198
0 commit comments