Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 26 additions & 11 deletions xds/internal/xdsclient/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,19 @@ func (s) TestServerFailureMetrics_BeforeResponseRecv(t *testing.T) {
if err != nil {
t.Fatalf("net.Listen() failed: %v", err)
}
mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{Listener: l})
lis := testutils.NewRestartableListener(l)
streamOpened := make(chan struct{}, 1)
mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{
Listener: lis,
OnStreamOpen: func(context.Context, int64, string) error {
select {
case streamOpened <- struct{}{}:
default:
}
return nil
},
})

nodeID := uuid.New().String()

bootstrapContents, err := bootstrap.NewContentsForTesting(bootstrap.ConfigOptionsForTesting{
Expand Down Expand Up @@ -196,16 +208,21 @@ func (s) TestServerFailureMetrics_BeforeResponseRecv(t *testing.T) {

// Watch for the listener on the above management server.
xdsresource.WatchListener(client, listenerResourceName, noopListenerWatcher{})

// Close the listener and ensure that the ADS stream breaks. This should
// cause a server failure count to emit eventually.
l.Close()
// Verify that an ADS stream is opened and an LDS request with the above
// resource name is sent.
select {
case <-streamOpened:
case <-ctx.Done():
t.Fatal("Timeout when waiting for ADS stream to close")
default:
t.Fatal("Timeout when waiting for ADS stream to open")
}

// Close the listener and ensure that the ADS stream breaks. This should
// cause a server failure count to emit eventually.
lis.Stop()

// Restart to prevent the attempt to create a new ADS stream after back off.
lis.Restart()
Comment on lines +223 to +224
Copy link
Contributor

@arjan-bal arjan-bal Apr 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This code is still racy because the backoff could have completed before the listener is restarted. There is an option that can be used to set a really long backoff duration instead.

// StreamBackoffAfterFailure is the backoff function used to determine the
// backoff duration after stream failures.
// If unspecified, uses the default value used in non-test code.
StreamBackoffAfterFailure func(int) time.Duration

Copy link
Contributor Author

@purnesh42H purnesh42H Apr 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, in this particular test case it won't happen because we are setting the watch expiry timeout which is way shorter than the back off. So, watch will just expire before backoff anyway but before that we would received the stream error.


mdWant := stats.MetricsData{
Handle: xdsClientServerFailureMetric.Descriptor(),
IntIncr: 1,
Expand Down Expand Up @@ -294,10 +311,8 @@ func (s) TestServerFailureMetrics_AfterResponseRecv(t *testing.T) {
// Close the listener and ensure that the ADS stream breaks. This should
// cause a server failure count to emit eventually.
lis.Stop()
select {
case <-ctx.Done():
t.Fatal("Timeout when waiting for ADS stream to close")
default:
if ctx.Err() != nil {
t.Fatalf("Timeout when waiting for ADS stream to close")
}
// Restart to prevent the attempt to create a new ADS stream after back off.
lis.Restart()
Expand Down