From 0f436e14691835a985e04639dfff07a8c44934c8 Mon Sep 17 00:00:00 2001 From: Takashi Matsuo Date: Sat, 30 May 2020 08:40:42 +0000 Subject: [PATCH 1/5] [dlp] fix: mitigate flakiness * make the Pub/Sub fixture function level * shorten the timeout for the tests from 300 secs to 30 secs * retring all the tests in risk_test.py 3 times fixes #3897 fixes #3896 fixes #3895 fixes #3894 fixes #3893 fixes #3892 fixes #3890 fixes #3889 --- dlp/risk_test.py | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/dlp/risk_test.py b/dlp/risk_test.py index ef2b0357dc6..156c6ea7549 100644 --- a/dlp/risk_test.py +++ b/dlp/risk_test.py @@ -38,7 +38,7 @@ # Create new custom topic/subscription -@pytest.fixture(scope="module") +@pytest.fixture(scope="function") def topic_id(): # Creates a pubsub topic, and tears it down. publisher = google.cloud.pubsub.PublisherClient() @@ -53,7 +53,7 @@ def topic_id(): publisher.delete_topic(topic_path) -@pytest.fixture(scope="module") +@pytest.fixture(scope="function") def subscription_id(topic_id): # Subscribes to a topic. subscriber = google.cloud.pubsub.SubscriberClient() @@ -160,7 +160,7 @@ def bigquery_project(): bigquery_client.delete_dataset(dataset_ref, delete_contents=True) -@pytest.mark.flaky +@pytest.mark.flaky(max_runs=3, min_passes=1) def test_numerical_risk_analysis( topic_id, subscription_id, bigquery_project, capsys ): @@ -172,13 +172,14 @@ def test_numerical_risk_analysis( NUMERIC_FIELD, topic_id, subscription_id, + timeout=30, ) out, _ = capsys.readouterr() assert "Value Range:" in out -@pytest.mark.flaky +@pytest.mark.flaky(max_runs=3, min_passes=1) def test_categorical_risk_analysis_on_string_field( topic_id, subscription_id, bigquery_project, capsys ): @@ -190,14 +191,14 @@ def test_categorical_risk_analysis_on_string_field( UNIQUE_FIELD, topic_id, subscription_id, - timeout=180, + timeout=30, ) out, _ = capsys.readouterr() assert "Most common value occurs" in out -@pytest.mark.flaky +@pytest.mark.flaky(max_runs=3, min_passes=1) def test_categorical_risk_analysis_on_number_field( topic_id, subscription_id, bigquery_project, capsys ): @@ -209,13 +210,14 @@ def test_categorical_risk_analysis_on_number_field( NUMERIC_FIELD, topic_id, subscription_id, + timeout=30, ) out, _ = capsys.readouterr() assert "Most common value occurs" in out -@pytest.mark.flaky +@pytest.mark.flaky(max_runs=3, min_passes=1) def test_k_anonymity_analysis_single_field( topic_id, subscription_id, bigquery_project, capsys ): @@ -227,6 +229,7 @@ def test_k_anonymity_analysis_single_field( topic_id, subscription_id, [NUMERIC_FIELD], + timeout=30, ) out, _ = capsys.readouterr() @@ -246,6 +249,7 @@ def test_k_anonymity_analysis_multiple_fields( topic_id, subscription_id, [NUMERIC_FIELD, REPEATED_FIELD], + timeout=30, ) out, _ = capsys.readouterr() @@ -253,7 +257,7 @@ def test_k_anonymity_analysis_multiple_fields( assert "Class size:" in out -@pytest.mark.flaky +@pytest.mark.flaky(max_runs=3, min_passes=1) def test_l_diversity_analysis_single_field( topic_id, subscription_id, bigquery_project, capsys ): @@ -266,6 +270,7 @@ def test_l_diversity_analysis_single_field( subscription_id, UNIQUE_FIELD, [NUMERIC_FIELD], + timeout=30, ) out, _ = capsys.readouterr() @@ -287,6 +292,7 @@ def test_l_diversity_analysis_multiple_field( subscription_id, UNIQUE_FIELD, [NUMERIC_FIELD, REPEATED_FIELD], + timeout=30, ) out, _ = capsys.readouterr() @@ -295,7 +301,7 @@ def test_l_diversity_analysis_multiple_field( assert "Sensitive value" in out -@pytest.mark.flaky +@pytest.mark.flaky(max_runs=3, min_passes=1) def test_k_map_estimate_analysis_single_field( topic_id, subscription_id, bigquery_project, capsys ): @@ -308,6 +314,7 @@ def test_k_map_estimate_analysis_single_field( subscription_id, [NUMERIC_FIELD], ["AGE"], + timeout=30, ) out, _ = capsys.readouterr() @@ -329,6 +336,7 @@ def test_k_map_estimate_analysis_multiple_field( subscription_id, [NUMERIC_FIELD, STRING_BOOLEAN_FIELD], ["AGE", "GENDER"], + timeout=30, ) out, _ = capsys.readouterr() @@ -337,7 +345,7 @@ def test_k_map_estimate_analysis_multiple_field( assert "Values" in out -@pytest.mark.flaky +@pytest.mark.flaky(max_runs=3, min_passes=1) def test_k_map_estimate_analysis_quasi_ids_info_types_equal( topic_id, subscription_id, bigquery_project ): @@ -351,4 +359,5 @@ def test_k_map_estimate_analysis_quasi_ids_info_types_equal( subscription_id, [NUMERIC_FIELD, STRING_BOOLEAN_FIELD], ["AGE"], + timeout=30, ) From dd542adde11b86a108f6429e9f70c38d196cd163 Mon Sep 17 00:00:00 2001 From: Takashi Matsuo Date: Sat, 30 May 2020 09:23:05 +0000 Subject: [PATCH 2/5] more retries, comment --- dlp/risk_test.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/dlp/risk_test.py b/dlp/risk_test.py index 156c6ea7549..b75dabe34a9 100644 --- a/dlp/risk_test.py +++ b/dlp/risk_test.py @@ -38,6 +38,9 @@ # Create new custom topic/subscription +# We observe sometimes all the tests in this file fail. In a +# hypothesis where DLP service somehow loses the connection to the +# topic, now we use function scope for Pub/Sub fixtures. @pytest.fixture(scope="function") def topic_id(): # Creates a pubsub topic, and tears it down. @@ -160,7 +163,7 @@ def bigquery_project(): bigquery_client.delete_dataset(dataset_ref, delete_contents=True) -@pytest.mark.flaky(max_runs=3, min_passes=1) +@pytest.mark.flaky(max_runs=5, min_passes=1) def test_numerical_risk_analysis( topic_id, subscription_id, bigquery_project, capsys ): @@ -179,7 +182,7 @@ def test_numerical_risk_analysis( assert "Value Range:" in out -@pytest.mark.flaky(max_runs=3, min_passes=1) +@pytest.mark.flaky(max_runs=5, min_passes=1) def test_categorical_risk_analysis_on_string_field( topic_id, subscription_id, bigquery_project, capsys ): @@ -198,7 +201,7 @@ def test_categorical_risk_analysis_on_string_field( assert "Most common value occurs" in out -@pytest.mark.flaky(max_runs=3, min_passes=1) +@pytest.mark.flaky(max_runs=5, min_passes=1) def test_categorical_risk_analysis_on_number_field( topic_id, subscription_id, bigquery_project, capsys ): @@ -217,7 +220,7 @@ def test_categorical_risk_analysis_on_number_field( assert "Most common value occurs" in out -@pytest.mark.flaky(max_runs=3, min_passes=1) +@pytest.mark.flaky(max_runs=5, min_passes=1) def test_k_anonymity_analysis_single_field( topic_id, subscription_id, bigquery_project, capsys ): @@ -237,7 +240,7 @@ def test_k_anonymity_analysis_single_field( assert "Class size:" in out -@pytest.mark.flaky(max_runs=3, min_passes=1) +@pytest.mark.flaky(max_runs=5, min_passes=1) def test_k_anonymity_analysis_multiple_fields( topic_id, subscription_id, bigquery_project, capsys ): @@ -257,7 +260,7 @@ def test_k_anonymity_analysis_multiple_fields( assert "Class size:" in out -@pytest.mark.flaky(max_runs=3, min_passes=1) +@pytest.mark.flaky(max_runs=5, min_passes=1) def test_l_diversity_analysis_single_field( topic_id, subscription_id, bigquery_project, capsys ): @@ -279,7 +282,7 @@ def test_l_diversity_analysis_single_field( assert "Sensitive value" in out -@pytest.mark.flaky(max_runs=3, min_passes=1) +@pytest.mark.flaky(max_runs=5, min_passes=1) def test_l_diversity_analysis_multiple_field( topic_id, subscription_id, bigquery_project, capsys ): @@ -301,7 +304,7 @@ def test_l_diversity_analysis_multiple_field( assert "Sensitive value" in out -@pytest.mark.flaky(max_runs=3, min_passes=1) +@pytest.mark.flaky(max_runs=5, min_passes=1) def test_k_map_estimate_analysis_single_field( topic_id, subscription_id, bigquery_project, capsys ): @@ -323,7 +326,7 @@ def test_k_map_estimate_analysis_single_field( assert "Values" in out -@pytest.mark.flaky(max_runs=3, min_passes=1) +@pytest.mark.flaky(max_runs=5, min_passes=1) def test_k_map_estimate_analysis_multiple_field( topic_id, subscription_id, bigquery_project, capsys ): @@ -345,7 +348,7 @@ def test_k_map_estimate_analysis_multiple_field( assert "Values" in out -@pytest.mark.flaky(max_runs=3, min_passes=1) +@pytest.mark.flaky(max_runs=5, min_passes=1) def test_k_map_estimate_analysis_quasi_ids_info_types_equal( topic_id, subscription_id, bigquery_project ): From 3d92e61c5df185e09b28e1c60202d8c2ba44058a Mon Sep 17 00:00:00 2001 From: Takashi Matsuo Date: Sat, 30 May 2020 18:57:08 +0000 Subject: [PATCH 3/5] 30 seconds operation wait and 20 minutes retry delay --- dlp/risk_test.py | 58 +++++++++++++++++++++++++++++++----------------- 1 file changed, 38 insertions(+), 20 deletions(-) diff --git a/dlp/risk_test.py b/dlp/risk_test.py index b75dabe34a9..eba8cccbcfc 100644 --- a/dlp/risk_test.py +++ b/dlp/risk_test.py @@ -36,6 +36,7 @@ BIGQUERY_TABLE_ID = "dlp_test_table" + UNIQUE_STRING BIGQUERY_HARMFUL_TABLE_ID = "harmful" + UNIQUE_STRING +TIMEOUT=30 # Create new custom topic/subscription # We observe sometimes all the tests in this file fail. In a @@ -163,7 +164,24 @@ def bigquery_project(): bigquery_client.delete_dataset(dataset_ref, delete_contents=True) -@pytest.mark.flaky(max_runs=5, min_passes=1) +def delay(): + # 20 mins of delay. This sounds like too long a delay, but we + # occasionally observe consequtive time block where operations are + # slow which leads to the test failures. These situations tend to + # get self healed in 20 minutes or so, so I'm trying this strategy. + # + # The worst case execution time becomes longer, but I think it + # will give us higher success rate. + # + # There are ten tests, and each tests has 30 seconds wait time + # and retried 1 times, so the worst case latency for the waits are: + # 210 minutes (3 hours 30 minutes). + # This might cause time out on Kokoro. + time.sleep(60*20) + return True + + +@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay) def test_numerical_risk_analysis( topic_id, subscription_id, bigquery_project, capsys ): @@ -175,14 +193,14 @@ def test_numerical_risk_analysis( NUMERIC_FIELD, topic_id, subscription_id, - timeout=30, + timeout=TIMEOUT, ) out, _ = capsys.readouterr() assert "Value Range:" in out -@pytest.mark.flaky(max_runs=5, min_passes=1) +@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay) def test_categorical_risk_analysis_on_string_field( topic_id, subscription_id, bigquery_project, capsys ): @@ -194,14 +212,14 @@ def test_categorical_risk_analysis_on_string_field( UNIQUE_FIELD, topic_id, subscription_id, - timeout=30, + timeout=TIMEOUT, ) out, _ = capsys.readouterr() assert "Most common value occurs" in out -@pytest.mark.flaky(max_runs=5, min_passes=1) +@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay) def test_categorical_risk_analysis_on_number_field( topic_id, subscription_id, bigquery_project, capsys ): @@ -213,14 +231,14 @@ def test_categorical_risk_analysis_on_number_field( NUMERIC_FIELD, topic_id, subscription_id, - timeout=30, + timeout=TIMEOUT, ) out, _ = capsys.readouterr() assert "Most common value occurs" in out -@pytest.mark.flaky(max_runs=5, min_passes=1) +@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay) def test_k_anonymity_analysis_single_field( topic_id, subscription_id, bigquery_project, capsys ): @@ -232,7 +250,7 @@ def test_k_anonymity_analysis_single_field( topic_id, subscription_id, [NUMERIC_FIELD], - timeout=30, + timeout=TIMEOUT, ) out, _ = capsys.readouterr() @@ -240,7 +258,7 @@ def test_k_anonymity_analysis_single_field( assert "Class size:" in out -@pytest.mark.flaky(max_runs=5, min_passes=1) +@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay) def test_k_anonymity_analysis_multiple_fields( topic_id, subscription_id, bigquery_project, capsys ): @@ -252,7 +270,7 @@ def test_k_anonymity_analysis_multiple_fields( topic_id, subscription_id, [NUMERIC_FIELD, REPEATED_FIELD], - timeout=30, + timeout=TIMEOUT, ) out, _ = capsys.readouterr() @@ -260,7 +278,7 @@ def test_k_anonymity_analysis_multiple_fields( assert "Class size:" in out -@pytest.mark.flaky(max_runs=5, min_passes=1) +@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay) def test_l_diversity_analysis_single_field( topic_id, subscription_id, bigquery_project, capsys ): @@ -273,7 +291,7 @@ def test_l_diversity_analysis_single_field( subscription_id, UNIQUE_FIELD, [NUMERIC_FIELD], - timeout=30, + timeout=TIMEOUT, ) out, _ = capsys.readouterr() @@ -282,7 +300,7 @@ def test_l_diversity_analysis_single_field( assert "Sensitive value" in out -@pytest.mark.flaky(max_runs=5, min_passes=1) +@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay) def test_l_diversity_analysis_multiple_field( topic_id, subscription_id, bigquery_project, capsys ): @@ -295,7 +313,7 @@ def test_l_diversity_analysis_multiple_field( subscription_id, UNIQUE_FIELD, [NUMERIC_FIELD, REPEATED_FIELD], - timeout=30, + timeout=TIMEOUT, ) out, _ = capsys.readouterr() @@ -304,7 +322,7 @@ def test_l_diversity_analysis_multiple_field( assert "Sensitive value" in out -@pytest.mark.flaky(max_runs=5, min_passes=1) +@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay) def test_k_map_estimate_analysis_single_field( topic_id, subscription_id, bigquery_project, capsys ): @@ -317,7 +335,7 @@ def test_k_map_estimate_analysis_single_field( subscription_id, [NUMERIC_FIELD], ["AGE"], - timeout=30, + timeout=TIMEOUT, ) out, _ = capsys.readouterr() @@ -326,7 +344,7 @@ def test_k_map_estimate_analysis_single_field( assert "Values" in out -@pytest.mark.flaky(max_runs=5, min_passes=1) +@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay) def test_k_map_estimate_analysis_multiple_field( topic_id, subscription_id, bigquery_project, capsys ): @@ -339,7 +357,7 @@ def test_k_map_estimate_analysis_multiple_field( subscription_id, [NUMERIC_FIELD, STRING_BOOLEAN_FIELD], ["AGE", "GENDER"], - timeout=30, + timeout=TIMEOUT, ) out, _ = capsys.readouterr() @@ -348,7 +366,7 @@ def test_k_map_estimate_analysis_multiple_field( assert "Values" in out -@pytest.mark.flaky(max_runs=5, min_passes=1) +@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay) def test_k_map_estimate_analysis_quasi_ids_info_types_equal( topic_id, subscription_id, bigquery_project ): @@ -362,5 +380,5 @@ def test_k_map_estimate_analysis_quasi_ids_info_types_equal( subscription_id, [NUMERIC_FIELD, STRING_BOOLEAN_FIELD], ["AGE"], - timeout=30, + timeout=TIMEOUT, ) From 906ad3e54ac2bdce5b111c39d18ee427575f6c6e Mon Sep 17 00:00:00 2001 From: Takashi Matsuo Date: Sat, 30 May 2020 19:18:57 +0000 Subject: [PATCH 4/5] lint fix etc --- dlp/risk_test.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dlp/risk_test.py b/dlp/risk_test.py index eba8cccbcfc..e36c6cfa547 100644 --- a/dlp/risk_test.py +++ b/dlp/risk_test.py @@ -13,6 +13,7 @@ # limitations under the License. import os +import time import uuid import google.cloud.bigquery @@ -36,7 +37,8 @@ BIGQUERY_TABLE_ID = "dlp_test_table" + UNIQUE_STRING BIGQUERY_HARMFUL_TABLE_ID = "harmful" + UNIQUE_STRING -TIMEOUT=30 +TIMEOUT = 30 + # Create new custom topic/subscription # We observe sometimes all the tests in this file fail. In a @@ -164,7 +166,7 @@ def bigquery_project(): bigquery_client.delete_dataset(dataset_ref, delete_contents=True) -def delay(): +def delay(err, *args): # 20 mins of delay. This sounds like too long a delay, but we # occasionally observe consequtive time block where operations are # slow which leads to the test failures. These situations tend to From 42625757645985e68149aa60582dc9c11a997fa6 Mon Sep 17 00:00:00 2001 From: Takashi Matsuo Date: Sun, 31 May 2020 19:22:51 +0000 Subject: [PATCH 5/5] limit the max retry wait time --- dlp/conftest.py | 20 ++++++++++++++++++++ dlp/risk_test.py | 14 ++++++-------- 2 files changed, 26 insertions(+), 8 deletions(-) create mode 100644 dlp/conftest.py diff --git a/dlp/conftest.py b/dlp/conftest.py new file mode 100644 index 00000000000..362e5a2c271 --- /dev/null +++ b/dlp/conftest.py @@ -0,0 +1,20 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + + +# Used in risk_test.py to limit the maximum wait time before the flaky retries. +def pytest_configure(config): + pytest.MAX_FLAKY_WAIT = 3600 # maximum of an hour diff --git a/dlp/risk_test.py b/dlp/risk_test.py index e36c6cfa547..0164cf3b8c0 100644 --- a/dlp/risk_test.py +++ b/dlp/risk_test.py @@ -172,14 +172,12 @@ def delay(err, *args): # slow which leads to the test failures. These situations tend to # get self healed in 20 minutes or so, so I'm trying this strategy. # - # The worst case execution time becomes longer, but I think it - # will give us higher success rate. - # - # There are ten tests, and each tests has 30 seconds wait time - # and retried 1 times, so the worst case latency for the waits are: - # 210 minutes (3 hours 30 minutes). - # This might cause time out on Kokoro. - time.sleep(60*20) + # There are 10 tests, so we don't want the retry delay happening + # for all the tests. When we exhaust the MAX_FLAKY_WAIT, we retry + # the test immediately. + wait_time = min(pytest.MAX_FLAKY_WAIT, 60*20) + pytest.MAX_FLAKY_WAIT -= wait_time + time.sleep(wait_time) return True