bitbucket: add GPU examples

casperdcl · casperdcl · commit 620eceb9ad0f · 2022-12-01T14:40:07.000Z
- fixes iterative/cml#1015 - fixes #349
diff --git a/content/docs/ref/runner.md b/content/docs/ref/runner.md
@@ -78,6 +78,15 @@ Any [generic option](/doc/ref) in addition to:
   need to write your code to save intermediate results to take advantage of
   this).
 
+### Bitbucket
+
+- **GPU support**.
+
+  See
+  [the guide on self-hosted Bitbucket runners](/doc/self-hosted-runners?tab=Bitbucket)
+  to work around
+  [Bitbucket's lack of native GPU support](https://jira.atlassian.com/browse/BCLOUD-21459).
+
 ## Examples
 
 ### Using `--cloud-permission-set`
diff --git a/content/docs/self-hosted-runners.md b/content/docs/self-hosted-runners.md
@@ -116,7 +116,7 @@ train-and-report:
 ```
 
 </tab>
-<tab title="Bitbucket">
+<tab title="Bitbucket (no GPU)">
 
 ```yaml
 pipelines:
@@ -134,7 +134,6 @@ pipelines:
     - step:
         runs-on: [self.hosted, cml.runner]
         image: iterativeai/cml:0-dvc2-base1
-        # GPU not yet supported, see https://github.com/iterative/cml/issues/1015
         script:
           - pip install -r requirements.txt
           - python train.py # generate plot.png
@@ -144,6 +143,56 @@ pipelines:
           - cml comment create report.md
 ```
 
+</tab>
+<tab title="Bitbucket">
+
+Bitbucket does not support GPUs natively
+([cml#1015](https://github.com/iterative/cml/issues/1015),
+[BCLOUD-21459](https://jira.atlassian.com/browse/BCLOUD-21459)). A work-around
+is to directly use
+[TPI](https://github.com/iterative/terraform-provider-iterative) (the library
+which CML `runner` uses internally). TPI includes a CLI-friendly helper called
+LEO (launch, execute, orchestrate), used below:
+
+```yaml
+image: iterativeai/cml:0-dvc2-base1
+pipelines:
+  default:
+    - step:
+        name: Launch Runner and Train
+        script:
+          # Create training script
+          - |
+            cat <<EOF > leo-script.sh
+            #!/bin/bash
+            apt-get update -q && apt-get install -yq python3.9
+            pip3 install -r requirements.txt
+            python train.py # generate plot.png
+            EOF
+          # Launch runner
+          - |
+            LEO_OPTIONS="--cloud=aws --region=us-west"
+            leo_id=$(leo create $LEO_OPTIONS \
+              --image=nvidia
+              --machine=p2.xlarge \
+              --disk-size=64 \
+              --workdir=. \
+              --output=. \
+              --environment AWS_ACCESS_KEY_ID="$AWS_ACCESS_KEY_ID" \
+              --environment AWS_SECRET_ACCESS_KEY="$AWS_SECRET_ACCESS_KEY" \
+              --script="$(cat ./leo-script.sh)"
+            )
+            # Wait for cloud training to finish
+            leo read $LEO_OPTIONS --follow "$leo_id"
+            sleep 45 # TODO: explain
+            # Download cloud training results & clean up cloud resources
+            leo delete $LEO_OPTIONS --workdir=. --output=. "$leo_id"
+          # Create CML report
+          - cat metrics.txt >> report.md
+          - echo '![](./plot.png "Confusion Matrix")' >> report.md
+          - cml comment create report.md
+```
+
 </tab>
 </toggle>
 
diff --git a/src/components/pages/Home/UseCasesSection/index.tsx b/src/components/pages/Home/UseCasesSection/index.tsx
@@ -673,6 +673,8 @@ const UseCasesSection: React.ForwardRefRenderFunction<HTMLElement> = () => (
                 bitbucket={(
                   <Collapser>
                     <Code filename="bitbucket-pipelines.yml" repo="https://bitbucket.org/iterative-ai/cml-cloud-case">
+                      <div><span># Use LEO instead of CML to force GPU support on Bitbucket</span></div>
+                      <div><span># (<a href="/doc/ref/runner#bitbucket">https://cml.dev/doc/ref/runner#bitbucket</a>)</span></div>
                       <div><span>image: iterativeai/cml:0-dvc2-base1</span></div>
                       <div><span>pipelines:</span></div>
                       <div>  <span>default:</span></div>
@@ -705,7 +707,7 @@ const UseCasesSection: React.ForwardRefRenderFunction<HTMLElement> = () => (
                       <div>              <span>--script=&quot;$(cat ./leo-script.sh)&quot;</span></div>
                       <div>            <span>)</span></div>
                       <div>            <span>leo read $LEO_OPTIONS --follow &quot;$leo_id&quot;</span></div>
-                      <div>            <span>sleep 45 # TODO: replace this hack with a proper wait loop</span></div>
+                      <div>            <span>sleep 45 # TODO: explain</span></div>
                       <div>            <span>leo delete $LEO_OPTIONS --workdir=&quot;.&quot; --output=&quot;.&quot; \</span></div>
                       <div>              <span>&quot;$leo_id&quot;</span></div>
                       </Tooltip>