Skip to content

Commit 40964ae

Browse files
authored
Merge pull request #234 from jsitu777/release-v1.2.0
[Cherry-pick] Enable Cloudwatch metrics for Canary Runs
2 parents 2e8ccd1 + 6a43447 commit 40964ae

File tree

3 files changed

+118
-3
lines changed

3 files changed

+118
-3
lines changed

test/canary/canary.buildspec.yaml

+9-2
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,15 @@ phases:
1919
commands:
2020
# Run tests
2121
- docker run --name ack-canary $(env | cut -f1 -d= | sed 's/^/-e /') --mount type=bind,source="$(pwd)/",target="/${SERVICE}-controller/" ${ECR_CACHE_URI}:latest
22-
22+
23+
post_build:
24+
commands:
25+
- docker cp ack-canary:/sagemaker-controller/test/canary/integration_tests.xml /tmp/results.xml || true
2326
# Push test image to cache ECR repo
2427
- docker push ${ECR_CACHE_URI}:latest || true
2528

26-
29+
reports:
30+
IntegrationTestReport:
31+
files:
32+
- "results.xml"
33+
base-directory: "/tmp"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
import boto3
2+
from datetime import datetime
3+
import xml.etree.ElementTree as ET
4+
import os
5+
6+
7+
xml_path = "../integration_tests.xml"
8+
9+
def readXML_and_publish_metrics_to_cw():
10+
if os.path.isfile(xml_path):
11+
tree = ET.parse(xml_path)
12+
testsuite = tree.find("testsuite")
13+
failures = testsuite.attrib["failures"]
14+
tests = testsuite.attrib["tests"]
15+
successes = int(tests) - int(failures)
16+
success_rate = (successes/int(tests))*100
17+
else:
18+
print("f{xml_path} does not exists.")
19+
print(os.getcwd())
20+
failures = 0
21+
successes = 0
22+
tests = 0
23+
success_rate = 0
24+
25+
timestamp = datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
26+
27+
print(f"Failures: {failures}")
28+
print(f"Total tests: {tests}")
29+
print(f"Success: {successes}")
30+
31+
# push to cloudwatch
32+
cw_client = boto3.client("cloudwatch")
33+
project_name = os.getenv("PROJECT_NAME")
34+
35+
# Define the metric data
36+
metric_data = [
37+
{
38+
"MetricName": "failures",
39+
"Timestamp": timestamp,
40+
"Dimensions": [
41+
{"Name": "CodeBuild Project Name", "Value": project_name},
42+
],
43+
"Value": int(failures),
44+
"Unit": "Count",
45+
},
46+
{
47+
"MetricName": "total_tests",
48+
"Timestamp": timestamp,
49+
"Dimensions": [
50+
{"Name": "CodeBuild Project Name", "Value": project_name},
51+
],
52+
"Value": int(tests),
53+
"Unit": "Count",
54+
},
55+
{
56+
"MetricName": "successes",
57+
"Timestamp": timestamp,
58+
"Dimensions": [
59+
{"Name": "CodeBuild Project Name", "Value": project_name},
60+
],
61+
"Value": int(successes),
62+
"Unit": "Count",
63+
},
64+
{
65+
"MetricName": "success_rate",
66+
"Timestamp": timestamp,
67+
"Dimensions": [
68+
{"Name": "CodeBuild Project Name", "Value": project_name},
69+
],
70+
"Value": int(success_rate),
71+
"Unit": "Percent",
72+
},
73+
]
74+
75+
# Use the put_metric_data method to push the metric data to CloudWatch
76+
try:
77+
response = cw_client.put_metric_data(
78+
Namespace="Canary_Metrics", MetricData=metric_data
79+
)
80+
if response["ResponseMetadata"]["HTTPStatusCode"] == 200:
81+
print("Successfully pushed data to CloudWatch")
82+
# return 200 status code if successful
83+
return 200
84+
else:
85+
# raise exception if the status code is not 200
86+
raise Exception(
87+
"Unexpected response status code: {}".format(
88+
response["ResponseMetadata"]["HTTPStatusCode"]
89+
)
90+
)
91+
except Exception as e:
92+
print("Error pushing data to CloudWatch: {}".format(e))
93+
# raise exception if there was an error pushing data to CloudWatch
94+
raise
95+
96+
97+
def main():
98+
readXML_and_publish_metrics_to_cw()
99+
100+
101+
if __name__ == "__main__":
102+
main()

test/canary/scripts/run_test.sh

+7-1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,11 @@ function print_controller_logs() {
2828
}
2929

3030
function cleanup {
31+
#push to metrics to cloudwatch
32+
echo "Pushing Codebuild stats to Cloudwatch."
33+
cd $SCRIPTS_DIR
34+
python push_stats_to_cloudwatch.py
35+
3136
echo "Cleaning up resources"
3237
set +e
3338
kubectl delete monitoringschedules --all
@@ -66,6 +71,7 @@ function cleanup {
6671
}
6772
trap cleanup EXIT
6873

74+
6975
# Update kubeconfig
7076
aws --region $CLUSTER_REGION eks update-kubeconfig --name $CLUSTER_NAME
7177

@@ -87,7 +93,7 @@ pushd $E2E_DIR
8793

8894
# run tests
8995
echo "Run Tests"
90-
pytest_args=( -n 15 --dist loadfile --log-cli-level INFO )
96+
pytest_args=( -n 15 --dist loadfile --log-cli-level INFO --junitxml ../canary/integration_tests.xml)
9197
if [[ $SERVICE_REGION =~ ^(eu-north-1|eu-west-3)$ ]]; then
9298
# If select_regions_1 true we run the notebook_instance test
9399
pytest_args+=(-m "canary or select_regions_1")

0 commit comments

Comments
 (0)