From 3e89b8d76b9b82da79e002cd243c2971523f1ecb Mon Sep 17 00:00:00 2001 From: Stavros Kontopoulos Date: Mon, 1 Feb 2021 13:15:43 +0200 Subject: [PATCH 01/13] unregister views --- metrics/resource_view.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/metrics/resource_view.go b/metrics/resource_view.go index 7135af70aa..c6c5aff87e 100644 --- a/metrics/resource_view.go +++ b/metrics/resource_view.go @@ -82,9 +82,14 @@ func cleanup() { expiryCutoff := allMeters.clock.Now().Add(-1 * maxMeterExporterAge) allMeters.lock.Lock() defer allMeters.lock.Unlock() + resourceViews.lock.Lock() + defer resourceViews.lock.Unlock() for key, meter := range allMeters.meters { if key != "" && meter.t.Before(expiryCutoff) { flushGivenExporter(meter.e) + // make a copy of views to avoid data races + viewsCopy := copyViews(resourceViews.views) + meter.m.Unregister(viewsCopy...) delete(allMeters.meters, key) } } From 7c2c04d3468c6e3ec7078244b12185bc29986418 Mon Sep 17 00:00:00 2001 From: Stavros Kontopoulos Date: Mon, 1 Feb 2021 14:30:00 +0200 Subject: [PATCH 02/13] add a test --- metrics/resource_view_test.go | 53 +++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/metrics/resource_view_test.go b/metrics/resource_view_test.go index 8814447aa8..1b03b6d077 100644 --- a/metrics/resource_view_test.go +++ b/metrics/resource_view_test.go @@ -189,6 +189,59 @@ func TestAllMetersExpiration(t *testing.T) { // (123=9m, 456=evicted, 789=0m) } +func TestIfAllMeterResourcesAreRemoved(t *testing.T) { + allMeters.clock = clock.Clock(clock.NewFakeClock(time.Now())) + var fakeClock *clock.FakeClock = allMeters.clock.(*clock.FakeClock) + ClearMetersForTest() // t+0m + // Register many resources at once + for i := 1; i <= 1000; i++ { + res := resource.Resource{Labels: map[string]string{"foo": "bar"}} + res.Labels["id"] = string(i) + _, err := optionForResource(&res) + if err != nil { + t.Error("Should succeed getting option, instead got error ", err) + } + + } + allMeters.lock.Lock() + // make a copy to test against as allMeters should be cleaned up + copyAllMeters := make(map[string]*meterExporter) + for k, v := range allMeters.meters { + copyAllMeters[k] = v + } + for _, meter := range copyAllMeters { + for _, mView := range resourceViews.views { + v := meter.m.Find(mView.Name) + if v == nil { + t.Errorf("Got a view that should be registered") + } + } + } + allMeters.lock.Unlock() + // expire all views + fakeClock.Step(12 * time.Minute) // t+12m + time.Sleep(time.Second) // Wait a second on the wallclock, so that the cleanup thread has time to finish a loop + allMeters.lock.Lock() + // non-expiring defaultMeter should be available + if len(allMeters.meters) != 1 { + t.Errorf("len(allMeters)=%d, want: 1", len(allMeters.meters)) + } + allMeters.lock.Unlock() + resourceViews.lock.Lock() + for _, meter := range copyAllMeters { + for _, mView := range resourceViews.views { + v := meter.m.Find(mView.Name) + if v != nil { + t.Errorf("Got a view that should be unregistered") + } + if meter.e != nil { + t.Errorf("Got a meter exporter set") + } + } + } + resourceViews.lock.Unlock() +} + func TestResourceAsString(t *testing.T) { r1 := &resource.Resource{Type: "foobar", Labels: map[string]string{"k1": "v1", "k3": "v3", "k2": "v2"}} r2 := &resource.Resource{Type: "foobar", Labels: map[string]string{"k2": "v2", "k3": "v3", "k1": "v1"}} From 944b345c496db01e9ef31e9308c73408832e5f8a Mon Sep 17 00:00:00 2001 From: Stavros Kontopoulos Date: Mon, 1 Feb 2021 14:51:13 +0200 Subject: [PATCH 03/13] fix string issue --- metrics/resource_view_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metrics/resource_view_test.go b/metrics/resource_view_test.go index 1b03b6d077..d4c6459a65 100644 --- a/metrics/resource_view_test.go +++ b/metrics/resource_view_test.go @@ -196,7 +196,7 @@ func TestIfAllMeterResourcesAreRemoved(t *testing.T) { // Register many resources at once for i := 1; i <= 1000; i++ { res := resource.Resource{Labels: map[string]string{"foo": "bar"}} - res.Labels["id"] = string(i) + res.Labels["id"] = fmt.Sprintf("id:%d", i) _, err := optionForResource(&res) if err != nil { t.Error("Should succeed getting option, instead got error ", err) From 4a399c84b4423ad6987a279102fb385167214d1f Mon Sep 17 00:00:00 2001 From: Stavros Kontopoulos Date: Mon, 1 Feb 2021 14:52:07 +0200 Subject: [PATCH 04/13] fix --- metrics/resource_view_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metrics/resource_view_test.go b/metrics/resource_view_test.go index d4c6459a65..a577885427 100644 --- a/metrics/resource_view_test.go +++ b/metrics/resource_view_test.go @@ -196,7 +196,7 @@ func TestIfAllMeterResourcesAreRemoved(t *testing.T) { // Register many resources at once for i := 1; i <= 1000; i++ { res := resource.Resource{Labels: map[string]string{"foo": "bar"}} - res.Labels["id"] = fmt.Sprintf("id:%d", i) + res.Labels["id"] = fmt.Sprintf("%d", i) _, err := optionForResource(&res) if err != nil { t.Error("Should succeed getting option, instead got error ", err) From 320f55d2757b1c33ac8726562c02fde35040d649 Mon Sep 17 00:00:00 2001 From: Stavros Kontopoulos Date: Tue, 2 Feb 2021 00:07:22 +0200 Subject: [PATCH 05/13] fixes --- metrics/resource_view_test.go | 60 +++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 24 deletions(-) diff --git a/metrics/resource_view_test.go b/metrics/resource_view_test.go index a577885427..fc51cfe052 100644 --- a/metrics/resource_view_test.go +++ b/metrics/resource_view_test.go @@ -25,6 +25,7 @@ import ( "go.opencensus.io/stats" "go.opencensus.io/stats/view" "k8s.io/apimachinery/pkg/util/clock" + "k8s.io/apimachinery/pkg/util/wait" ) var ( @@ -170,12 +171,14 @@ func TestAllMetersExpiration(t *testing.T) { // Expire the second entry fakeClock.Step(9 * time.Minute) // t+12m - time.Sleep(time.Second) // Wait a second on the wallclock, so that the cleanup thread has time to finish a loop - allMeters.lock.Lock() + _ = wait.PollImmediate(time.Second, time.Minute, func() (bool, error) { + // Non-expiring defaultMeter should be available + return len(allMeters.meters) == 2, nil + }) + if len(allMeters.meters) != 2 { t.Errorf("len(allMeters)=%d, want: 2", len(allMeters.meters)) } - allMeters.lock.Unlock() // (123=9m, 456=10.5m) // non-expiring defaultMeter was just tested @@ -194,48 +197,57 @@ func TestIfAllMeterResourcesAreRemoved(t *testing.T) { var fakeClock *clock.FakeClock = allMeters.clock.(*clock.FakeClock) ClearMetersForTest() // t+0m // Register many resources at once - for i := 1; i <= 1000; i++ { + for i := 1; i <= 100; i++ { res := resource.Resource{Labels: map[string]string{"foo": "bar"}} - res.Labels["id"] = fmt.Sprintf("%d", i) - _, err := optionForResource(&res) - if err != nil { + res.Labels["id"] = fmt.Sprint(i) + if _, err := optionForResource(&res); err != nil { t.Error("Should succeed getting option, instead got error ", err) } + m := stats.Int64("testView_sum", "", stats.UnitDimensionless) + v := view.View{Name: fmt.Sprintf("testview-%d", i), Measure: m, Aggregation: view.Sum()} + err := RegisterResourceView(&v) + if err != nil { + t.Fatal("RegisterResourceView =", err) + } } allMeters.lock.Lock() - // make a copy to test against as allMeters should be cleaned up + // Make a copy to test against as allMeters should be cleaned up copyAllMeters := make(map[string]*meterExporter) for k, v := range allMeters.meters { copyAllMeters[k] = v } + allMeters.lock.Unlock() + resourceViews.lock.Lock() for _, meter := range copyAllMeters { for _, mView := range resourceViews.views { - v := meter.m.Find(mView.Name) - if v == nil { - t.Errorf("Got a view that should be registered") + want, got := mView, meter.m.Find(mView.Name) + if got == nil { + t.Errorf("View %s is not available", want.Name) + } else if want.Name != got.Name { + t.Errorf("Want %v, got %v", want.Name, got.Name) } } } - allMeters.lock.Unlock() - // expire all views + resourceViews.lock.Unlock() + + // Expire all meters and views fakeClock.Step(12 * time.Minute) // t+12m - time.Sleep(time.Second) // Wait a second on the wallclock, so that the cleanup thread has time to finish a loop - allMeters.lock.Lock() - // non-expiring defaultMeter should be available + + _ = wait.PollImmediate(time.Second, time.Minute, func() (bool, error) { + // Non-expiring defaultMeter should be available + return len(allMeters.meters) == 1, nil + }) + if len(allMeters.meters) != 1 { t.Errorf("len(allMeters)=%d, want: 1", len(allMeters.meters)) } - allMeters.lock.Unlock() resourceViews.lock.Lock() - for _, meter := range copyAllMeters { + for k, meter := range copyAllMeters { for _, mView := range resourceViews.views { - v := meter.m.Find(mView.Name) - if v != nil { - t.Errorf("Got a view that should be unregistered") - } - if meter.e != nil { - t.Errorf("Got a meter exporter set") + // Default meter should be skipped as views are not removed from it during cleanup + if k != "" && meter.m.Find(mView.Name) != nil { + t.Errorf("Got a view that should be unregistered: %s", mView.Name) } } } From 8023f0e5ceba13455017834a5a99d212c0ee7b4a Mon Sep 17 00:00:00 2001 From: Stavros Kontopoulos Date: Tue, 2 Feb 2021 00:30:54 +0200 Subject: [PATCH 06/13] fixe races in tests --- metrics/resource_view_test.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/metrics/resource_view_test.go b/metrics/resource_view_test.go index fc51cfe052..7d5c3bb716 100644 --- a/metrics/resource_view_test.go +++ b/metrics/resource_view_test.go @@ -173,6 +173,8 @@ func TestAllMetersExpiration(t *testing.T) { fakeClock.Step(9 * time.Minute) // t+12m _ = wait.PollImmediate(time.Second, time.Minute, func() (bool, error) { // Non-expiring defaultMeter should be available + allMeters.lock.Lock() + defer allMeters.lock.Unlock() return len(allMeters.meters) == 2, nil }) @@ -236,6 +238,8 @@ func TestIfAllMeterResourcesAreRemoved(t *testing.T) { _ = wait.PollImmediate(time.Second, time.Minute, func() (bool, error) { // Non-expiring defaultMeter should be available + allMeters.lock.Lock() + defer allMeters.lock.Unlock() return len(allMeters.meters) == 1, nil }) From 8b1987fde33fb2a65d5d03cdb8a64bf4c7ea04f1 Mon Sep 17 00:00:00 2001 From: Stavros Kontopoulos Date: Tue, 2 Feb 2021 11:55:37 +0200 Subject: [PATCH 07/13] fix exporter issue --- metrics/resource_view_test.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/metrics/resource_view_test.go b/metrics/resource_view_test.go index 7d5c3bb716..60445af897 100644 --- a/metrics/resource_view_test.go +++ b/metrics/resource_view_test.go @@ -75,6 +75,8 @@ type testExporter struct { id string } +func (testExporter) ExportView(viewData *view.Data) {} + func TestSetFactory(t *testing.T) { var oldFactory ResourceExporterFactory func() { @@ -171,7 +173,7 @@ func TestAllMetersExpiration(t *testing.T) { // Expire the second entry fakeClock.Step(9 * time.Minute) // t+12m - _ = wait.PollImmediate(time.Second, time.Minute, func() (bool, error) { + _ = wait.PollImmediate(100*time.Millisecond, 5*time.Second, func() (bool, error) { // Non-expiring defaultMeter should be available allMeters.lock.Lock() defer allMeters.lock.Unlock() @@ -235,8 +237,7 @@ func TestIfAllMeterResourcesAreRemoved(t *testing.T) { // Expire all meters and views fakeClock.Step(12 * time.Minute) // t+12m - - _ = wait.PollImmediate(time.Second, time.Minute, func() (bool, error) { + _ = wait.PollImmediate(100*time.Millisecond, 5*time.Second, func() (bool, error) { // Non-expiring defaultMeter should be available allMeters.lock.Lock() defer allMeters.lock.Unlock() From 7e05f8164003e1f6a4b6454113fe564c168dad2b Mon Sep 17 00:00:00 2001 From: Stavros Kontopoulos Date: Sun, 7 Feb 2021 15:54:28 +0200 Subject: [PATCH 08/13] stop meter in cleanup --- metrics/resource_view.go | 1 + metrics/resource_view_test.go | 10 ---------- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/metrics/resource_view.go b/metrics/resource_view.go index c6c5aff87e..cf9c95f8c9 100644 --- a/metrics/resource_view.go +++ b/metrics/resource_view.go @@ -91,6 +91,7 @@ func cleanup() { viewsCopy := copyViews(resourceViews.views) meter.m.Unregister(viewsCopy...) delete(allMeters.meters, key) + meter.m.Stop() } } } diff --git a/metrics/resource_view_test.go b/metrics/resource_view_test.go index 60445af897..95098be274 100644 --- a/metrics/resource_view_test.go +++ b/metrics/resource_view_test.go @@ -247,16 +247,6 @@ func TestIfAllMeterResourcesAreRemoved(t *testing.T) { if len(allMeters.meters) != 1 { t.Errorf("len(allMeters)=%d, want: 1", len(allMeters.meters)) } - resourceViews.lock.Lock() - for k, meter := range copyAllMeters { - for _, mView := range resourceViews.views { - // Default meter should be skipped as views are not removed from it during cleanup - if k != "" && meter.m.Find(mView.Name) != nil { - t.Errorf("Got a view that should be unregistered: %s", mView.Name) - } - } - } - resourceViews.lock.Unlock() } func TestResourceAsString(t *testing.T) { From e6dd2de19774d837732524fafa3e0f6bf960a79d Mon Sep 17 00:00:00 2001 From: Stavros Kontopoulos Date: Tue, 9 Feb 2021 16:58:31 +0200 Subject: [PATCH 09/13] fixes --- metrics/e2e_test.go | 2 +- metrics/resource_view.go | 2 +- metrics/resource_view_test.go | 11 +++++++++-- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/metrics/e2e_test.go b/metrics/e2e_test.go index b8ec21d324..6a2401dc8e 100644 --- a/metrics/e2e_test.go +++ b/metrics/e2e_test.go @@ -195,7 +195,7 @@ func TestMetricsExport(t *testing.T) { return err } // Wait for the webserver to actually start serving metrics - return wait.PollImmediate(10*time.Millisecond, 10*time.Second, func() (bool, error) { + return wait.PollImmediate(10*time.Millisecond, 20*time.Second, func() (bool, error) { resp, err := http.Get(fmt.Sprintf("http://localhost:%d/metrics", prometheusPort)) return err == nil && resp.StatusCode == http.StatusOK, nil }) diff --git a/metrics/resource_view.go b/metrics/resource_view.go index cf9c95f8c9..5f31615688 100644 --- a/metrics/resource_view.go +++ b/metrics/resource_view.go @@ -145,7 +145,7 @@ func RegisterResourceView(views ...*view.View) error { return nil } -// UnregisterResourceView is similar to view.Unregiste(), except that it will +// UnregisterResourceView is similar to view.Unregister(), except that it will // unregister the view across all Resources tracked byt he system, rather than // simply the default view. func UnregisterResourceView(views ...*view.View) { diff --git a/metrics/resource_view_test.go b/metrics/resource_view_test.go index 95098be274..3e70151ced 100644 --- a/metrics/resource_view_test.go +++ b/metrics/resource_view_test.go @@ -180,9 +180,11 @@ func TestAllMetersExpiration(t *testing.T) { return len(allMeters.meters) == 2, nil }) + allMeters.lock.Lock() if len(allMeters.meters) != 2 { t.Errorf("len(allMeters)=%d, want: 2", len(allMeters.meters)) } + allMeters.lock.Unlock() // (123=9m, 456=10.5m) // non-expiring defaultMeter was just tested @@ -221,7 +223,7 @@ func TestIfAllMeterResourcesAreRemoved(t *testing.T) { for k, v := range allMeters.meters { copyAllMeters[k] = v } - allMeters.lock.Unlock() + resourceViews.lock.Lock() for _, meter := range copyAllMeters { for _, mView := range resourceViews.views { @@ -234,8 +236,9 @@ func TestIfAllMeterResourcesAreRemoved(t *testing.T) { } } resourceViews.lock.Unlock() - + allMeters.lock.Unlock() // Expire all meters and views + // We need to unlock before we move the clock ahead in time fakeClock.Step(12 * time.Minute) // t+12m _ = wait.PollImmediate(100*time.Millisecond, 5*time.Second, func() (bool, error) { // Non-expiring defaultMeter should be available @@ -244,6 +247,10 @@ func TestIfAllMeterResourcesAreRemoved(t *testing.T) { return len(allMeters.meters) == 1, nil }) + allMeters.lock.Lock() + defer allMeters.lock.Unlock() + resourceViews.lock.Lock() + defer resourceViews.lock.Unlock() if len(allMeters.meters) != 1 { t.Errorf("len(allMeters)=%d, want: 1", len(allMeters.meters)) } From 4a2c053ba107386d818c63655735bbbcfca5429a Mon Sep 17 00:00:00 2001 From: Stavros Kontopoulos Date: Tue, 9 Feb 2021 17:00:54 +0200 Subject: [PATCH 10/13] typo --- metrics/resource_view.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metrics/resource_view.go b/metrics/resource_view.go index 5f31615688..236fd588ba 100644 --- a/metrics/resource_view.go +++ b/metrics/resource_view.go @@ -87,7 +87,7 @@ func cleanup() { for key, meter := range allMeters.meters { if key != "" && meter.t.Before(expiryCutoff) { flushGivenExporter(meter.e) - // make a copy of views to avoid data races + // Make a copy of views to avoid data races viewsCopy := copyViews(resourceViews.views) meter.m.Unregister(viewsCopy...) delete(allMeters.meters, key) From 2059b067a90dda7446204a4dbe8da9b1299c2105 Mon Sep 17 00:00:00 2001 From: Stavros Kontopoulos Date: Tue, 9 Feb 2021 21:06:01 +0200 Subject: [PATCH 11/13] revert timeout change --- metrics/e2e_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metrics/e2e_test.go b/metrics/e2e_test.go index 6a2401dc8e..b8ec21d324 100644 --- a/metrics/e2e_test.go +++ b/metrics/e2e_test.go @@ -195,7 +195,7 @@ func TestMetricsExport(t *testing.T) { return err } // Wait for the webserver to actually start serving metrics - return wait.PollImmediate(10*time.Millisecond, 20*time.Second, func() (bool, error) { + return wait.PollImmediate(10*time.Millisecond, 10*time.Second, func() (bool, error) { resp, err := http.Get(fmt.Sprintf("http://localhost:%d/metrics", prometheusPort)) return err == nil && resp.StatusCode == http.StatusOK, nil }) From 120826147423821761287f9684a00f4ac7037eed Mon Sep 17 00:00:00 2001 From: Stavros Kontopoulos Date: Tue, 9 Feb 2021 21:27:31 +0200 Subject: [PATCH 12/13] fixes --- metrics/resource_view_test.go | 38 +++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/metrics/resource_view_test.go b/metrics/resource_view_test.go index 3e70151ced..fc8073ef86 100644 --- a/metrics/resource_view_test.go +++ b/metrics/resource_view_test.go @@ -217,26 +217,30 @@ func TestIfAllMeterResourcesAreRemoved(t *testing.T) { t.Fatal("RegisterResourceView =", err) } } - allMeters.lock.Lock() - // Make a copy to test against as allMeters should be cleaned up - copyAllMeters := make(map[string]*meterExporter) - for k, v := range allMeters.meters { - copyAllMeters[k] = v - } - resourceViews.lock.Lock() - for _, meter := range copyAllMeters { - for _, mView := range resourceViews.views { - want, got := mView, meter.m.Find(mView.Name) - if got == nil { - t.Errorf("View %s is not available", want.Name) - } else if want.Name != got.Name { - t.Errorf("Want %v, got %v", want.Name, got.Name) + func() { + allMeters.lock.Lock() + resourceViews.lock.Lock() + defer allMeters.lock.Unlock() + defer resourceViews.lock.Unlock() + // Make a copy to test against as allMeters should be cleaned up + copyAllMeters := make(map[string]*meterExporter, len(allMeters.meters)) + for k, v := range allMeters.meters { + copyAllMeters[k] = v + } + + for _, meter := range copyAllMeters { + for _, mView := range resourceViews.views { + want, got := mView, meter.m.Find(mView.Name) + if got == nil { + t.Errorf("View %s is not available", want.Name) + } else if want.Name != got.Name { + t.Errorf("Want %v, got %v", want.Name, got.Name) + } } } - } - resourceViews.lock.Unlock() - allMeters.lock.Unlock() + }() + // Expire all meters and views // We need to unlock before we move the clock ahead in time fakeClock.Step(12 * time.Minute) // t+12m From 5000a743ba367f383fce9bcb28aa6217951a19a3 Mon Sep 17 00:00:00 2001 From: Stavros Kontopoulos Date: Wed, 10 Feb 2021 15:30:06 +0200 Subject: [PATCH 13/13] change msg --- metrics/resource_view_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metrics/resource_view_test.go b/metrics/resource_view_test.go index fc8073ef86..37766239ad 100644 --- a/metrics/resource_view_test.go +++ b/metrics/resource_view_test.go @@ -174,7 +174,7 @@ func TestAllMetersExpiration(t *testing.T) { // Expire the second entry fakeClock.Step(9 * time.Minute) // t+12m _ = wait.PollImmediate(100*time.Millisecond, 5*time.Second, func() (bool, error) { - // Non-expiring defaultMeter should be available + // Non-expiring defaultMeter should be available along with the non-expired entry allMeters.lock.Lock() defer allMeters.lock.Unlock() return len(allMeters.meters) == 2, nil