1
- # Python CircleCI 2.0 configuration file
2
- #
3
- # Check https://circleci.com/docs/2.0/language-python/ for more details
4
- #
5
- version : 2.0
1
+ # Python CircleCI 2.1 configuration file.
2
+ version : 2.1
3
+ orbs :
4
+ gcp-gke :
circleci/[email protected]
5
+
6
+ codecov :
codecov/[email protected]
6
7
7
8
references :
8
9
9
- install_deps : &install_deps
10
+ make_docs : &make_docs
10
11
run :
11
- name : Install Dependences
12
+ name : Make Documentation
12
13
command : |
13
- sudo apt-get update && sudo apt-get install -y cmake
14
- pip install -r requirements/base.txt -q
15
- pip install -r requirements/test.txt -q
14
+ # First run the same pipeline as Read-The-Docs
15
+ # apt-get update && apt-get install -y cmake
16
+ # using: https://hub.docker.com/r/readthedocs/build
17
+ # we need to use py3.7 ot higher becase of an issue with metaclass inheritence
18
+ pyenv global 3.7.3
19
+ python --version
20
+ pip install -r requirements/docs.txt
21
+ cd docs; make clean; make html --debug --jobs 2 SPHINXOPTS="-W"
16
22
17
- tests : &tests
23
+ checkout_ml_testing : &checkout_ml_testing
18
24
run :
19
- name : Testing
25
+ name : Checkout ml-testing-accelerators
20
26
command : |
21
- python --version ; pip --version ; pip list
22
- python -m pytest pytorch_lightning -v --junitxml=test-reports/pytest_junit.xml --ignore=pytorch_lightning/loggers/comet.py --ignore=pytorch_lightning/loggers/mlflow.py --ignore=pytorch_lightning/loggers/neptune.py --ignore=pytorch_lightning/loggers/test_tube.py --ignore=pytorch_lightning/loggers/wandb.py --ignore=pytorch_lightning/metrics/sklearns.py
23
- no_output_timeout : 15m
27
+ git clone https://github.com/GoogleCloudPlatform/ml-testing-accelerators.git
28
+ cd ml-testing-accelerators
29
+ git fetch origin 5e88ac24f631c27045e62f0e8d5dfcf34e425e25:stable
30
+ git checkout stable
31
+ cd ..
24
32
25
- make_docs : &make_docs
33
+ build_push_docker : &build_push_docker
34
+ run :
35
+ name : Build and push Docker image
36
+ command : |
37
+ gcloud --quiet auth configure-docker
38
+ cd dockers/tpu-tests
39
+ # TODO: How to find the GITHUB_REF in CircleCI?
40
+ # $CI_PULL_REQUEST seems to be of form: https://github.com/org/repo-name/pull/11.
41
+ # Grab the last bit, e.g. pull/11, convert to pull/11/head, and use it
42
+ # for the GITHUB_REF so Docker can pull the latest pending code in PR.
43
+ git_ref=$(echo "$CI_PULL_REQUEST" | sed "s/.*pytorch-lightning\///")/head
44
+ docker build --tag "$GCR_IMAGE_PATH:$CIRCLE_WORKFLOW_JOB_ID" -f Dockerfile --build-arg "TEST_IMAGE=1" --build-arg "GITHUB_REF=$git_ref" .
45
+ #docker build --tag "$IMAGE:$GITHUB_RUN_ID" -f Dockerfile --build-arg "GITHUB_REF=$GITHUB_REF" --build-arg "TEST_IMAGE=1" .
46
+ docker push "$GCR_IMAGE_PATH:$CIRCLE_WORKFLOW_JOB_ID"
47
+
48
+ deploy_cluster : &deploy_cluster
26
49
run :
27
- name : Make Documentation
50
+ name : Deploy the job on the kubernetes cluster
28
51
command : |
29
- # First run the same pipeline as Read-The-Docs
30
- # apt-get update && apt-get install -y cmake
31
- # using: https://hub.docker.com/r/readthedocs/build
32
- # we need to use py3.7 ot higher becase of an issue with metaclass inheritence
33
- pyenv global 3.7.3
34
- python --version
35
- pip install -r requirements/docs.txt
36
- cd docs; make clean; make html --debug --jobs 2 SPHINXOPTS="-W"
52
+ go get github.com/google/go-jsonnet/cmd/jsonnet
53
+ export PATH=$PATH:$HOME/go/bin
54
+ job_name=$(jsonnet -J ml-testing-accelerators/ dockers/tpu-tests/tpu_test_cases.jsonnet --ext-str image=$GCR_IMAGE_PATH --ext-str image-tag=$CIRCLE_WORKFLOW_JOB_ID | kubectl create -f -)
55
+ job_name=${job_name#job.batch/}
56
+ job_name=${job_name% created}
57
+ echo "Waiting on kubernetes job: $job_name"
58
+ i=0 && \
59
+ # N checks spaced 30s apart = 900s total.
60
+ status_code=2 && \
61
+ # Check on the job periodically. Set the status code depending on what
62
+ # happened to the job in Kubernetes. If we try MAX_CHECKS times and
63
+ # still the job hasn't finished, give up and return the starting
64
+ # non-zero status code.
65
+ while [ $i -lt $MAX_CHECKS ]; do ((i++)); if kubectl get jobs $job_name -o jsonpath='Failed:{.status.failed}' | grep "Failed:1"; then status_code=1 && break; elif kubectl get jobs $job_name -o jsonpath='Succeeded:{.status.succeeded}' | grep "Succeeded:1" ; then status_code=0 && break; else echo "Job not finished yet"; fi; sleep 30; done && \
66
+ echo "Done waiting. Job status code: $status_code" && \
67
+ # Allow time for logs to flush.
68
+ sleep 30 && \
69
+ echo "JOB_NAME: $job_name" && \
70
+ gcloud logging read "resource.type=k8s_container resource.labels.project_id=$GOOGLE_PROJECT_ID resource.labels.location=$GOOGLE_COMPUTE_ZONE resource.labels.cluster_name=$GKE_CLUSTER resource.labels.namespace_name=default resource.labels.pod_name:$job_name" --limit 10000000 --order asc --format 'value(textPayload)' --project=$GOOGLE_PROJECT_ID > /tmp/full_output.txt && \
71
+ if grep -q '<?xml version="1.0" ?>' /tmp/full_output.txt ; then csplit /tmp/full_output.txt '/<?xml version="1.0" ?>/'; else mv /tmp/full_output.txt xx00; fi && \
72
+ # First portion is the test logs. Print these to Github Action stdout.
73
+ cat xx00 && \
74
+ echo "Done with log retrieval attempt." && \
75
+ gcloud container images delete "$GCR_IMAGE_PATH:$CIRCLE_WORKFLOW_JOB_ID" --force-delete-tags && \
76
+ exit $status_code
77
+
78
+ stats : &stats
79
+ run :
80
+ name : Statistics
81
+ command : |
82
+ mv ./xx01 coverage.xml
83
+ # TODO: add human readable report
84
+ cat coverage.xml
85
+ sudo pip install pycobertura
86
+ pycobertura show coverage.xml
37
87
38
88
jobs :
39
89
40
- Build-Docs :
90
+ TPU-tests :
91
+ docker :
92
+ - image : circleci/python:3.7
93
+ environment :
94
+ - MAX_CHECKS : 60
95
+ steps :
96
+ - checkout
97
+ - go/install
98
+ - *checkout_ml_testing
99
+ - gcp-gke/install
100
+ - gcp-gke/update-kubeconfig-with-credentials :
101
+ cluster : $GKE_CLUSTER
102
+ perform-login : true
103
+ - setup_remote_docker
104
+ - *build_push_docker
105
+ - *deploy_cluster
106
+ - *stats
107
+ - codecov/upload :
108
+ file : coverage.xml
109
+ flags : tpu,pytest
110
+ upload_name : TPU-coverage
111
+
112
+ - store_artifacts :
113
+ path : coverage.xml
114
+
115
+ build-Docs :
41
116
docker :
42
117
- image : readthedocs/build:latest
43
118
steps :
@@ -48,24 +123,9 @@ jobs:
48
123
path : docs/build/html/
49
124
destination : html
50
125
51
- CPU-Tests :
52
- # todo: to be replaced by TPU tests
53
- docker :
54
- - image : circleci/python:3.6
55
- environment :
56
- - TORCH_VERSION : " torch"
57
- steps : &steps
58
- - checkout
59
- - *install_deps
60
- - *tests
61
- - store_test_results :
62
- path : test-reports
63
- - store_artifacts :
64
- path : test-reports
65
-
66
126
workflows :
67
127
version : 2
68
- build :
128
+ tpu-tests :
69
129
jobs :
70
- - Build -Docs
71
- - CPU-Tests
130
+ - build -Docs
131
+ - TPU-tests
0 commit comments