From d5bbcac4fcd0db83de13b3999dd03f1e31c953ba Mon Sep 17 00:00:00 2001
From: Dmitry Balabanov <balabdmi@amazon.de>
Date: Thu, 10 Mar 2022 11:11:07 +0000
Subject: [PATCH 1/2] fix(tracer): change E2E tests waiting behavior

Instead of fixed-time waiting, the tests will poll for  results.
It fixes the false-negative result when traces need more time to arrive.
---
 package-lock.json                             | 38 +++++++-
 packages/tracing/package.json                 |  4 +-
 packages/tracing/tests/e2e/tracer.test.ts     | 32 +++----
 packages/tracing/tests/helpers/tracesUtils.ts | 86 +++++++++++--------
 4 files changed, 104 insertions(+), 56 deletions(-)

diff --git a/package-lock.json b/package-lock.json
index e30b6597fb..77f46282fd 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -4781,6 +4781,21 @@
       "integrity": "sha512-ReVR2rLTV1kvtlWFyuot+d1pkpG2Fw/XKE3PDAdj57rbM97ttSp9JZ2UsP+2EHTylra9cUf6JA7tGwW1INzUrA==",
       "dev": true
     },
+    "node_modules/@types/promise-retry": {
+      "version": "1.1.3",
+      "resolved": "https://registry.npmjs.org/@types/promise-retry/-/promise-retry-1.1.3.tgz",
+      "integrity": "sha512-LxIlEpEX6frE3co3vCO2EUJfHIta1IOmhDlcAsR4GMMv9hev1iTI9VwberVGkePJAuLZs5rMucrV8CziCfuJMw==",
+      "dev": true,
+      "dependencies": {
+        "@types/retry": "*"
+      }
+    },
+    "node_modules/@types/retry": {
+      "version": "0.12.1",
+      "resolved": "https://registry.npmjs.org/@types/retry/-/retry-0.12.1.tgz",
+      "integrity": "sha512-xoDlM2S4ortawSWORYqsdU+2rxdh4LRW9ytc3zmT37RIKQh6IHyKwwtKhKis9ah8ol07DCkZxPt8BBvPjC6v4g==",
+      "dev": true
+    },
     "node_modules/@types/stack-utils": {
       "version": "2.0.1",
       "resolved": "https://registry.npmjs.org/@types/stack-utils/-/stack-utils-2.0.1.tgz",
@@ -16066,7 +16081,9 @@
         "aws-xray-sdk-core": "^3.3.3"
       },
       "devDependencies": {
-        "@aws-sdk/client-dynamodb": "^3.52.0"
+        "@aws-sdk/client-dynamodb": "^3.52.0",
+        "@types/promise-retry": "^1.1.3",
+        "promise-retry": "^2.0.1"
       }
     }
   },
@@ -16765,7 +16782,9 @@
       "requires": {
         "@aws-lambda-powertools/commons": "^0.7.0",
         "@aws-sdk/client-dynamodb": "^3.52.0",
-        "aws-xray-sdk-core": "^3.3.3"
+        "@types/promise-retry": "^1.1.3",
+        "aws-xray-sdk-core": "^3.3.3",
+        "promise-retry": "^2.0.1"
       }
     },
     "@aws-sdk/abort-controller": {
@@ -19794,6 +19813,21 @@
       "integrity": "sha512-ReVR2rLTV1kvtlWFyuot+d1pkpG2Fw/XKE3PDAdj57rbM97ttSp9JZ2UsP+2EHTylra9cUf6JA7tGwW1INzUrA==",
       "dev": true
     },
+    "@types/promise-retry": {
+      "version": "1.1.3",
+      "resolved": "https://registry.npmjs.org/@types/promise-retry/-/promise-retry-1.1.3.tgz",
+      "integrity": "sha512-LxIlEpEX6frE3co3vCO2EUJfHIta1IOmhDlcAsR4GMMv9hev1iTI9VwberVGkePJAuLZs5rMucrV8CziCfuJMw==",
+      "dev": true,
+      "requires": {
+        "@types/retry": "*"
+      }
+    },
+    "@types/retry": {
+      "version": "0.12.1",
+      "resolved": "https://registry.npmjs.org/@types/retry/-/retry-0.12.1.tgz",
+      "integrity": "sha512-xoDlM2S4ortawSWORYqsdU+2rxdh4LRW9ytc3zmT37RIKQh6IHyKwwtKhKis9ah8ol07DCkZxPt8BBvPjC6v4g==",
+      "dev": true
+    },
     "@types/stack-utils": {
       "version": "2.0.1",
       "resolved": "https://registry.npmjs.org/@types/stack-utils/-/stack-utils-2.0.1.tgz",
diff --git a/packages/tracing/package.json b/packages/tracing/package.json
index 6937244309..04ee233937 100644
--- a/packages/tracing/package.json
+++ b/packages/tracing/package.json
@@ -30,7 +30,9 @@
   "main": "./lib/index.js",
   "types": "./lib/index.d.ts",
   "devDependencies": {
-    "@aws-sdk/client-dynamodb": "^3.52.0"
+    "@aws-sdk/client-dynamodb": "^3.52.0",
+    "@types/promise-retry": "^1.1.3",
+    "promise-retry": "^2.0.1"
   },
   "files": [
     "lib"
diff --git a/packages/tracing/tests/e2e/tracer.test.ts b/packages/tracing/tests/e2e/tracer.test.ts
index a1002e57cd..de988d2658 100644
--- a/packages/tracing/tests/e2e/tracer.test.ts
+++ b/packages/tracing/tests/e2e/tracer.test.ts
@@ -4,7 +4,7 @@
  * @group e2e/tracer/manual
  */
 
-import { randomUUID } from 'crypto';
+import { randomUUID, randomBytes } from 'crypto';
 import { join } from 'path';
 import { Tracing, Architecture } from '@aws-cdk/aws-lambda';
 import { NodejsFunction } from '@aws-cdk/aws-lambda-nodejs';
@@ -35,7 +35,7 @@ describe('Tracer integration tests', () => {
 
   let integTestApp: App;
   let stack: Stack;
-  const invocationsMap: { [key: string]: { serviceName: string; resourceArn: string } } = {};
+  const invocationsMap: { [key: string]: { serviceName: string; functionName: string; resourceArn: string } } = {};
 
   beforeAll(async () => {
 
@@ -70,10 +70,11 @@ describe('Tracer integration tests', () => {
     for (const functionName of functions) {
       const expectedServiceName = randomUUID();
       const fileName = functionName.split('-')[0];
+      const functionInstanceName = `${functionName}-${randomBytes(12).toString('hex')}`;
       const fn = new NodejsFunction(stack, functionName, {
         entry: join(__dirname, `tracer.test.${fileName}.ts`),
         handler: 'handler',
-        functionName: functionName,
+        functionName: functionInstanceName,
         tracing: Tracing.ACTIVE,
         architecture: Architecture.X86_64,
         memorySize: 256,
@@ -95,7 +96,8 @@ describe('Tracer integration tests', () => {
       table.grantWriteData(fn);
       invocationsMap[functionName] = {
         serviceName: expectedServiceName,
-        resourceArn: `arn:aws:lambda:${region}:${account}:function:${functionName}`, // ARN is still a token at this point, so we construct the ARN manually
+        functionName: functionInstanceName,
+        resourceArn: `arn:aws:lambda:${region}:${account}:function:${functionInstanceName}`, // ARN is still a token at this point, so we construct the ARN manually
       };
     }
 
@@ -111,10 +113,11 @@ describe('Tracer integration tests', () => {
     });
 
     // Act
-    Object.keys(invocationsMap).forEach(async (functionName) => {
+    Object.values(invocationsMap).forEach(async ({ functionName }) => {
       for (let i = 0; i < invocations; i++) {
         await lambdaClient.invoke({
           FunctionName: functionName,
+          LogType: 'Tail',
           Payload: JSON.stringify({
             throw: i === invocations - 1 ? true : false, // only last invocation should throw
             sdkV2: i === 1 ? 'all' : 'client', // only second invocation should use captureAll
@@ -124,9 +127,6 @@ describe('Tracer integration tests', () => {
       }
     });
     
-    // sleep to allow for traces to be collected
-    await new Promise((resolve) => setTimeout(resolve, ONE_MINUTE * 2));
-
   }, ONE_MINUTE * 5);
 
   afterAll(async () => {
@@ -154,7 +154,7 @@ describe('Tracer integration tests', () => {
     
     // Assess
     // Retrieve traces from X-Ray using Resource ARN as filter
-    const sortedTraces = await getTraces(xray, startTime, resourceArn, invocations);
+    const sortedTraces = await getTraces(xray, startTime, resourceArn, invocations, 4);
 
     for (let i = 0; i < invocations; i++) {
       // Assert that the trace has the expected amount of segments
@@ -224,7 +224,7 @@ describe('Tracer integration tests', () => {
 
     // Assess
     // Retrieve traces from X-Ray using Resource ARN as filter
-    const sortedTraces = await getTraces(xray, startTime, resourceArn, invocations);
+    const sortedTraces = await getTraces(xray, startTime, resourceArn, invocations, 4);
 
     for (let i = 0; i < invocations; i++) {
       // Assert that the trace has the expected amount of segments
@@ -294,7 +294,7 @@ describe('Tracer integration tests', () => {
 
     // Assess
     // Retrieve traces from X-Ray using Resource ARN as filter
-    const sortedTraces = await getTraces(xray, startTime, resourceArn, invocations);
+    const sortedTraces = await getTraces(xray, startTime, resourceArn, invocations, 4);
 
     for (let i = 0; i < invocations; i++) {
       // Assert that the trace has the expected amount of segments
@@ -362,7 +362,7 @@ describe('Tracer integration tests', () => {
     
     // Assess
     // Retrieve traces from X-Ray using Resource ARN as filter
-    const sortedTraces = await getTraces(xray, startTime, resourceArn, invocations);
+    const sortedTraces = await getTraces(xray, startTime, resourceArn, invocations, 2);
 
     for (let i = 0; i < invocations; i++) {
       // Assert that the trace has the expected amount of segments
@@ -387,7 +387,7 @@ describe('Tracer integration tests', () => {
     
     // Assess
     // Retrieve traces from X-Ray using Resource ARN as filter
-    const sortedTraces = await getTraces(xray, startTime, resourceArn, invocations);
+    const sortedTraces = await getTraces(xray, startTime, resourceArn, invocations, 4);
 
     for (let i = 0; i < invocations; i++) {
       // Assert that the trace has the expected amount of segments
@@ -484,7 +484,7 @@ describe('Tracer integration tests', () => {
     
     // Assess
     // Retrieve traces from X-Ray using Resource ARN as filter
-    const sortedTraces = await getTraces(xray, startTime, resourceArn, invocations);
+    const sortedTraces = await getTraces(xray, startTime, resourceArn, invocations, 4);
 
     for (let i = 0; i < invocations; i++) {
       // Assert that the trace has the expected amount of segments
@@ -581,7 +581,7 @@ describe('Tracer integration tests', () => {
     
     // Assess
     // Retrieve traces from X-Ray using Resource ARN as filter
-    const sortedTraces = await getTraces(xray, startTime, resourceArn, invocations);
+    const sortedTraces = await getTraces(xray, startTime, resourceArn, invocations, 4);
 
     for (let i = 0; i < invocations; i++) {
       // Assert that the trace has the expected amount of segments
@@ -666,7 +666,7 @@ describe('Tracer integration tests', () => {
     
     // Assess
     // Retrieve traces from X-Ray using Resource ARN as filter
-    const sortedTraces = await getTraces(xray, startTime, resourceArn, invocations);
+    const sortedTraces = await getTraces(xray, startTime, resourceArn, invocations, 2);
 
     for (let i = 0; i < invocations; i++) {
       // Assert that the trace has the expected amount of segments
diff --git a/packages/tracing/tests/helpers/tracesUtils.ts b/packages/tracing/tests/helpers/tracesUtils.ts
index 576fdf0fd6..31f1b795f9 100644
--- a/packages/tracing/tests/helpers/tracesUtils.ts
+++ b/packages/tracing/tests/helpers/tracesUtils.ts
@@ -1,4 +1,5 @@
 import { XRay } from 'aws-sdk';
+import promiseRetry from 'promise-retry';
 
 interface ParsedDocument {
   name: string
@@ -57,48 +58,59 @@ interface ParsedTrace {
   Segments: ParsedSegment[]
 }
 
-const getTraces = async (xrayClient: XRay, startTime: Date, resourceArn: string, expectedTraces: number): Promise<ParsedTrace[]> => {
-  const endTime = new Date();
-  console.log(`Manual query: aws xray get-trace-summaries --start-time ${Math.floor(startTime.getTime()/1000)} --end-time ${Math.floor(endTime.getTime()/1000)} --filter-expression 'resource.arn = "${resourceArn}"'`);
-  const traces = await xrayClient
-    .getTraceSummaries({
-      StartTime: startTime,
-      EndTime: endTime,
-      FilterExpression: `resource.arn = "${resourceArn}"`,
-    })
-    .promise();
-
-  if (traces.TraceSummaries?.length !== expectedTraces) {
-    throw new Error(`Expected ${expectedTraces} traces, got ${traces.TraceSummaries?.length} for ${resourceArn}`);
-  }
+const getTraces = async (xrayClient: XRay, startTime: Date, resourceArn: string, expectedTraces: number, expectedSegments: number): Promise<ParsedTrace[]> => {
+  const retryOptions = { retries: 20, minTimeout: 5_000, maxTimeout: 10_000, factor: 1.25 };
 
-  const traceDetails = await xrayClient.batchGetTraces({
-    TraceIds: traces.TraceSummaries?.map((traceSummary) => traceSummary?.Id) as XRay.TraceIdList,
-  }).promise();
+  return promiseRetry(async(retry: (err?: Error) => never , _: number) => {
 
-  if (traceDetails.Traces?.length !== expectedTraces) {
-    throw new Error(`Expected ${expectedTraces} trace summaries, got ${traceDetails.Traces?.length} for ${resourceArn}`);
-  }
+    const endTime = new Date();
+    console.log(`Manual query: aws xray get-trace-summaries --start-time ${Math.floor(startTime.getTime() / 1000)} --end-time ${Math.floor(endTime.getTime() / 1000)} --filter-expression 'resource.arn = "${resourceArn}"'`);
+    const traces = await xrayClient
+      .getTraceSummaries({
+        StartTime: startTime,
+        EndTime: endTime,
+        FilterExpression: `resource.arn = "${resourceArn}"`,
+      })
+      .promise();
 
-  const sortedTraces = traceDetails.Traces?.map((trace): ParsedTrace => ({
-    Duration: trace?.Duration as number,
-    Id: trace?.Id as string,
-    LimitExceeded: trace?.LimitExceeded as boolean,
-    Segments: trace.Segments?.map((segment) => ({
-      Document: JSON.parse(segment?.Document as string) as ParsedDocument,
-      Id: segment.Id as string,
-    })).sort((a, b) => a.Document.start_time - b.Document.start_time) as ParsedSegment[],
-  })).sort((a, b) => a.Segments[0].Document.start_time - b.Segments[0].Document.start_time);
-
-  if (sortedTraces === undefined) {
-    throw new Error(`Traces are undefined for ${resourceArn}`);
-  }
+    if (traces.TraceSummaries?.length !== expectedTraces) {
+      retry(new Error(`Expected ${expectedTraces} traces, got ${traces.TraceSummaries?.length} for ${resourceArn}`));
+    }
 
-  if (sortedTraces.length !== expectedTraces) {
-    throw new Error(`Expected ${expectedTraces} sorted traces, but got ${sortedTraces.length} for ${resourceArn}`);
-  }
+    const traceDetails = await xrayClient.batchGetTraces({
+      TraceIds: traces.TraceSummaries?.map((traceSummary) => traceSummary?.Id) as XRay.TraceIdList,
+    }).promise();
+
+    if (traceDetails.Traces?.length !== expectedTraces) {
+      retry(new Error(`Expected ${expectedTraces} trace summaries, got ${traceDetails.Traces?.length} for ${resourceArn}`));
+    }
+
+    const sortedTraces = traceDetails.Traces?.map((trace): ParsedTrace => ({
+      Duration: trace?.Duration as number,
+      Id: trace?.Id as string,
+      LimitExceeded: trace?.LimitExceeded as boolean,
+      Segments: trace.Segments?.map((segment) => ({
+        Document: JSON.parse(segment?.Document as string) as ParsedDocument,
+        Id: segment.Id as string,
+      })).sort((a, b) => a.Document.start_time - b.Document.start_time) as ParsedSegment[],
+    })).sort((a, b) => a.Segments[0].Document.start_time - b.Segments[0].Document.start_time);
+
+    if (sortedTraces === undefined) {
+      throw new Error(`Traces are undefined for ${resourceArn}`);
+    }
+
+    if (sortedTraces.length !== expectedTraces) {
+      throw new Error(`Expected ${expectedTraces} sorted traces, but got ${sortedTraces.length} for ${resourceArn}`);
+    }
+
+    sortedTraces.forEach((trace) => {
+      if (trace.Segments?.length != expectedSegments) {
+        retry(new Error(`Expected ${expectedSegments} segments, got ${trace.Segments?.length} for trace id ${trace.Id}`));
+      }
+    });
 
-  return sortedTraces;
+    return sortedTraces;
+  }, retryOptions);
 };
 
 const getFunctionSegment = (trace: ParsedTrace): ParsedSegment => {

From 4eb9d5c3a98a24f82cc7ba0b7911dee68417b3a2 Mon Sep 17 00:00:00 2001
From: Dmitry Balabanov <balabdmi@amazon.de>
Date: Thu, 10 Mar 2022 15:12:31 +0000
Subject: [PATCH 2/2] fix(metrics): change E2E tests waiting behavior

Instead of fixed-time waiting, the tests will poll for  metrics.
It fixes the false-negative result when data needs more time to arrive.
---
 packages/metrics/package.json                 |  4 ++
 packages/metrics/tests/e2e/decorator.test.ts  | 38 ++++++++-----------
 .../tests/e2e/standardFunctions.test.ts       | 35 ++++++++---------
 .../metrics/tests/helpers/metricsUtils.ts     | 24 ++++++++++++
 4 files changed, 58 insertions(+), 43 deletions(-)
 create mode 100644 packages/metrics/tests/helpers/metricsUtils.ts

diff --git a/packages/metrics/package.json b/packages/metrics/package.json
index b36c4abf22..9e0d1533cf 100644
--- a/packages/metrics/package.json
+++ b/packages/metrics/package.json
@@ -50,6 +50,10 @@
   "main": "./lib/index.js",
   "types": "./lib/index.d.ts",
   "typedocMain": "src/index.ts",
+  "devDependencies": {
+    "@types/promise-retry": "^1.1.3",
+    "promise-retry": "^2.0.1"
+  },
   "files": [
     "lib"
   ],
diff --git a/packages/metrics/tests/e2e/decorator.test.ts b/packages/metrics/tests/e2e/decorator.test.ts
index eca5988752..9397ec221b 100644
--- a/packages/metrics/tests/e2e/decorator.test.ts
+++ b/packages/metrics/tests/e2e/decorator.test.ts
@@ -15,6 +15,9 @@ import { SdkProvider } from 'aws-cdk/lib/api/aws-auth';
 import { CloudFormationDeployments } from 'aws-cdk/lib/api/cloudformation-deployments';
 import * as AWS from 'aws-sdk';
 import { MetricUnits } from '../../src';
+import { getMetrics } from '../helpers/metricsUtils';
+
+const ONE_MINUTE = 1000 * 60;
 
 const cloudwatchClient = new AWS.CloudWatch();
 const lambdaClient = new AWS.Lambda();
@@ -80,10 +83,7 @@ describe('happy cases', () => {
         .promise();
     }
 
-    // THEN
-    // sleep to allow metrics to be collected
-    await new Promise((resolve) => setTimeout(resolve, 15000));
-  }, 200000);
+  }, ONE_MINUTE * 3);
 
   it('capture ColdStart Metric', async () => {
     const expectedDimensions = [
@@ -92,12 +92,8 @@ describe('happy cases', () => {
       { Name: Object.keys(expectedDefaultDimensions)[0], Value: expectedDefaultDimensions.MyDimension },
     ];
     // Check coldstart metric dimensions
-    const coldStartMetrics = await cloudwatchClient
-      .listMetrics({
-        Namespace: expectedNamespace,
-        MetricName: 'ColdStart',
-      })
-      .promise();
+    const coldStartMetrics = await getMetrics(cloudwatchClient, expectedNamespace, 'ColdStart', 1);
+
     expect(coldStartMetrics.Metrics?.length).toBe(1);
     const coldStartMetric = coldStartMetrics.Metrics?.[0];
     expect(coldStartMetric?.Dimensions).toStrictEqual(expectedDimensions);
@@ -124,16 +120,12 @@ describe('happy cases', () => {
     // Despite lambda has been called twice, coldstart metric sum should only be 1
     const singleDataPoint = coldStartMetricStat.Datapoints ? coldStartMetricStat.Datapoints[0] : {};
     expect(singleDataPoint?.Sum).toBe(1);
-  }, 15000);
+  }, ONE_MINUTE * 3);
 
   it('produce added Metric with the default and extra one dimensions', async () => {
     // Check metric dimensions
-    const metrics = await cloudwatchClient
-      .listMetrics({
-        Namespace: expectedNamespace,
-        MetricName: expectedMetricName,
-      })
-      .promise();
+    const metrics = await getMetrics(cloudwatchClient, expectedNamespace, expectedMetricName, 1);
+
     expect(metrics.Metrics?.length).toBe(1);
     const metric = metrics.Metrics?.[0];
     const expectedDimensions = [
@@ -144,16 +136,16 @@ describe('happy cases', () => {
     expect(metric?.Dimensions).toStrictEqual(expectedDimensions);
 
     // Check coldstart metric value
-    const adjustedStartTime = new Date(startTime.getTime() - 60 * 1000);
-    const endTime = new Date(new Date().getTime() + 60 * 1000);
+    const adjustedStartTime = new Date(startTime.getTime() - 3 * ONE_MINUTE);
+    const endTime = new Date(new Date().getTime() + ONE_MINUTE);
     console.log(`Manual command: aws cloudwatch get-metric-statistics --namespace ${expectedNamespace} --metric-name ${expectedMetricName} --start-time ${Math.floor(adjustedStartTime.getTime()/1000)} --end-time ${Math.floor(endTime.getTime()/1000)} --statistics 'Sum' --period 60 --dimensions '${JSON.stringify(expectedDimensions)}'`);
     const metricStat = await cloudwatchClient
       .getMetricStatistics(
         {
           Namespace: expectedNamespace,
-          StartTime: new Date(startTime.getTime() - 60 * 1000), // minus 1 minute,
+          StartTime: adjustedStartTime,
           Dimensions: expectedDimensions,
-          EndTime: new Date(new Date().getTime() + 60 * 1000),
+          EndTime: endTime,
           Period: 60,
           MetricName: expectedMetricName,
           Statistics: ['Sum'],
@@ -165,7 +157,7 @@ describe('happy cases', () => {
     // Since lambda has been called twice in this test and potentially more in others, metric sum should be at least of expectedMetricValue * invocationCount
     const singleDataPoint = metricStat.Datapoints ? metricStat.Datapoints[0] : {};
     expect(singleDataPoint?.Sum).toBeGreaterThanOrEqual(parseInt(expectedMetricValue) * invocationCount);
-  }, 15000);
+  }, ONE_MINUTE * 3);
 
   afterAll(async () => {
     if (!process.env.DISABLE_TEARDOWN) {
@@ -181,5 +173,5 @@ describe('happy cases', () => {
         quiet: true,
       });
     }
-  }, 200000);
+  }, ONE_MINUTE * 3);
 });
diff --git a/packages/metrics/tests/e2e/standardFunctions.test.ts b/packages/metrics/tests/e2e/standardFunctions.test.ts
index cf970277f6..975a5a3652 100644
--- a/packages/metrics/tests/e2e/standardFunctions.test.ts
+++ b/packages/metrics/tests/e2e/standardFunctions.test.ts
@@ -15,6 +15,9 @@ import { SdkProvider } from 'aws-cdk/lib/api/aws-auth';
 import { CloudFormationDeployments } from 'aws-cdk/lib/api/cloudformation-deployments';
 import * as AWS from 'aws-sdk';
 import { MetricUnits } from '../../src';
+import { getMetrics } from '../helpers/metricsUtils';
+
+const ONE_MINUTE = 1000 * 60;
 
 const cloudwatchClient = new AWS.CloudWatch();
 const lambdaClient = new AWS.Lambda();
@@ -83,16 +86,12 @@ describe('happy cases', () => {
     // THEN
     // sleep to allow metrics to be collected
     await new Promise((resolve) => setTimeout(resolve, 15000));
-  }, 200000);
+  }, ONE_MINUTE * 3);
 
   it('capture ColdStart Metric', async () => {
     // Check coldstart metric dimensions
-    const coldStartMetrics = await cloudwatchClient
-      .listMetrics({
-        Namespace: expectedNamespace,
-        MetricName: 'ColdStart',
-      })
-      .promise();
+    const coldStartMetrics = await getMetrics(cloudwatchClient, expectedNamespace, 'ColdStart', 1);
+
     expect(coldStartMetrics.Metrics?.length).toBe(1);
     const coldStartMetric = coldStartMetrics.Metrics?.[0];
     expect(coldStartMetric?.Dimensions).toStrictEqual([{ Name: 'service', Value: expectedServiceName }]);
@@ -119,16 +118,12 @@ describe('happy cases', () => {
     // Despite lambda has been called twice, coldstart metric sum should only be 1
     const singleDataPoint = coldStartMetricStat.Datapoints ? coldStartMetricStat.Datapoints[0] : {};
     expect(singleDataPoint?.Sum).toBe(1);
-  }, 15000);
+  }, ONE_MINUTE * 3);
 
   it('produce added Metric with the default and extra one dimensions', async () => {
     // Check metric dimensions
-    const metrics = await cloudwatchClient
-      .listMetrics({
-        Namespace: expectedNamespace,
-        MetricName: expectedMetricName,
-      })
-      .promise();
+    const metrics = await getMetrics(cloudwatchClient, expectedNamespace, expectedMetricName, 1);
+
     expect(metrics.Metrics?.length).toBe(1);
     const metric = metrics.Metrics?.[0];
     const expectedDimensions = [
@@ -139,16 +134,16 @@ describe('happy cases', () => {
     expect(metric?.Dimensions).toStrictEqual(expectedDimensions);
 
     // Check coldstart metric value
-    const adjustedStartTime = new Date(startTime.getTime() - 60 * 1000);
-    const endTime = new Date(new Date().getTime() + 60 * 1000);
+    const adjustedStartTime = new Date(startTime.getTime() - 3 * ONE_MINUTE);
+    const endTime = new Date(new Date().getTime() + ONE_MINUTE);
     console.log(`Manual command: aws cloudwatch get-metric-statistics --namespace ${expectedNamespace} --metric-name ${expectedMetricName} --start-time ${Math.floor(adjustedStartTime.getTime()/1000)} --end-time ${Math.floor(endTime.getTime()/1000)} --statistics 'Sum' --period 60 --dimensions '${JSON.stringify(expectedDimensions)}'`);
     const metricStat = await cloudwatchClient
       .getMetricStatistics(
         {
           Namespace: expectedNamespace,
-          StartTime: new Date(startTime.getTime() - 60 * 1000), // minus 1 minute,
+          StartTime: adjustedStartTime,
           Dimensions: expectedDimensions,
-          EndTime: new Date(new Date().getTime() + 60 * 1000),
+          EndTime: endTime,
           Period: 60,
           MetricName: expectedMetricName,
           Statistics: ['Sum'],
@@ -160,7 +155,7 @@ describe('happy cases', () => {
     // Since lambda has been called twice in this test and potentially more in others, metric sum should be at least of expectedMetricValue * invocationCount
     const singleDataPoint = metricStat.Datapoints ? metricStat.Datapoints[0] : {};
     expect(singleDataPoint.Sum).toBeGreaterThanOrEqual(parseInt(expectedMetricValue) * invocationCount);
-  }, 15000);
+  }, ONE_MINUTE * 3);
 
   afterAll(async () => {
     if (!process.env.DISABLE_TEARDOWN) {
@@ -176,5 +171,5 @@ describe('happy cases', () => {
         quiet: true,
       });
     }
-  }, 200000);
+  }, ONE_MINUTE * 3);
 });
diff --git a/packages/metrics/tests/helpers/metricsUtils.ts b/packages/metrics/tests/helpers/metricsUtils.ts
new file mode 100644
index 0000000000..d585b777a0
--- /dev/null
+++ b/packages/metrics/tests/helpers/metricsUtils.ts
@@ -0,0 +1,24 @@
+import { CloudWatch } from 'aws-sdk';
+import promiseRetry from 'promise-retry';
+
+const getMetrics = async (cloudWatchClient: CloudWatch, namespace: string, metric: string, expectedMetrics: number): Promise<CloudWatch.ListMetricsOutput> => {
+  const retryOptions = { retries: 20, minTimeout: 5_000, maxTimeout: 10_000, factor: 1.25 };
+
+  return promiseRetry(async (retry: (err?: Error) => never, _: number) => {
+
+    const result = await cloudWatchClient
+      .listMetrics({
+        Namespace: namespace,
+        MetricName: metric,
+      })
+      .promise();
+
+    if (result.Metrics?.length !== expectedMetrics) {
+      retry(new Error(`Expected ${expectedMetrics} metrics, got ${result.Metrics?.length} for ${namespace}.${metric}`));
+    }
+
+    return result;
+  }, retryOptions);
+};
+
+export { getMetrics };
\ No newline at end of file