Skip to content

Commit 23f7c4e

Browse files
authored
APMSP-1930 add retry logic for test setup requests to test agent (#1015)
add retry logic for test setup requests to test agent used in integration tests
1 parent e1678bf commit 23f7c4e

File tree

1 file changed

+112
-11
lines changed

1 file changed

+112
-11
lines changed

trace-utils/src/test_utils/datadog_test_agent.rs

Lines changed: 112 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
// Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/
22
// SPDX-License-Identifier: Apache-2.0
33

4+
use cargo_metadata::MetadataCommand;
5+
use ddcommon::hyper_migration::{self, Body};
6+
use http_body_util::BodyExt;
7+
use hyper::body::Incoming;
8+
use hyper::{Request, Response, Uri};
49
use std::borrow::Cow;
510
use std::collections::HashMap;
611
use std::path::Path;
712
use std::str::FromStr;
813
use std::time::Duration;
9-
10-
use cargo_metadata::MetadataCommand;
11-
use ddcommon::hyper_migration;
12-
use http_body_util::BodyExt;
13-
use hyper::Uri;
1414
use testcontainers::{
1515
core::{wait::HttpWaitStrategy, AccessMode, ContainerPort, Mount, WaitFor},
1616
runners::AsyncRunner,
@@ -282,11 +282,21 @@ impl DatadogTestAgent {
282282
///
283283
/// * `snapshot_token` - A string slice that holds the snapshot token.
284284
pub async fn assert_snapshot(&self, snapshot_token: &str) {
285-
let client = hyper_migration::new_default_client();
286285
let uri = self
287286
.get_uri_for_endpoint("test/session/snapshot", Some(snapshot_token))
288287
.await;
289-
let res = client.get(uri).await.expect("Request failed");
288+
289+
let req = Request::builder()
290+
.method("GET")
291+
.uri(uri)
292+
.body(Body::empty())
293+
.expect("Failed to create request");
294+
295+
let res = self
296+
.agent_request_with_retry(req, 5)
297+
.await
298+
.expect("request failed");
299+
290300
let status_code = res.status();
291301
let body_bytes = res
292302
.into_body()
@@ -329,10 +339,18 @@ impl DatadogTestAgent {
329339
/// }
330340
/// ```
331341
pub async fn get_sent_traces(&self) -> Vec<serde_json::Value> {
332-
let client = hyper_migration::new_default_client();
333342
let uri = self.get_uri_for_endpoint("test/traces", None).await;
334343

335-
let res = client.get(uri).await.expect("Request failed");
344+
let req = Request::builder()
345+
.method("GET")
346+
.uri(uri)
347+
.body(Body::empty())
348+
.expect("Failed to create request");
349+
350+
let res = self
351+
.agent_request_with_retry(req, 5)
352+
.await
353+
.expect("request failed");
336354

337355
let body_bytes = res
338356
.into_body()
@@ -383,7 +401,7 @@ impl DatadogTestAgent {
383401
session_token: &str,
384402
agent_sample_rates_by_service: Option<&str>,
385403
) {
386-
let client = hyper_migration::new_default_client();
404+
// let client = hyper_migration::new_default_client();
387405

388406
let mut query_params_map = HashMap::new();
389407
query_params_map.insert(SESSION_TEST_TOKEN_QUERY_PARAM_KEY, session_token);
@@ -395,7 +413,16 @@ impl DatadogTestAgent {
395413
.get_uri_for_endpoint_and_params(SESSION_START_ENDPOINT, query_params_map)
396414
.await;
397415

398-
let res = client.get(uri).await.expect("Request failed");
416+
let req = Request::builder()
417+
.method("GET")
418+
.uri(uri)
419+
.body(Body::empty())
420+
.expect("Failed to create request");
421+
422+
let res = self
423+
.agent_request_with_retry(req, 5)
424+
.await
425+
.expect("request failed");
399426

400427
assert_eq!(
401428
res.status(),
@@ -405,4 +432,78 @@ impl DatadogTestAgent {
405432
res.status()
406433
);
407434
}
435+
436+
/// Sends an HTTP request to the Datadog Test Agent with rudimentary retry logic.
437+
///
438+
/// In rare situations when tests are running on CI, the container running the test agent may
439+
/// reset the network connection even after ready states pass. Instead of adding arbitrary
440+
/// sleeps to these tests, we can just retry the request. This function should not be used for
441+
/// requests that are actually being tested, like sending payloads to the test agent. It should
442+
/// only be used for requests to setup the test. Examples of when you would use this
443+
/// function are for starting sessions or getting snapshot results.
444+
///
445+
/// # Arguments
446+
///
447+
/// * `req` - A `Request<Body>` representing the HTTP request to be sent.
448+
/// * `max_attempts` - An `i32` specifying the maximum number of request attempts to be made.
449+
///
450+
/// # Returns
451+
///
452+
/// * `Ok(Response<Incoming>)` - If the request succeeds. The status may or may not be
453+
/// successful.
454+
/// * `Err(anyhow::Error)` - If all retry attempts fail or an error occurs during the request.
455+
///
456+
/// ```
457+
async fn agent_request_with_retry(
458+
&self,
459+
req: Request<Body>,
460+
max_attempts: i32,
461+
) -> anyhow::Result<Response<Incoming>> {
462+
let mut attempts = 1;
463+
let mut delay_ms = 100;
464+
let (parts, body) = req.into_parts();
465+
let body_bytes = body
466+
.collect()
467+
.await
468+
.expect("Failed to collect body")
469+
.to_bytes();
470+
let mut last_response;
471+
472+
loop {
473+
let client = hyper_migration::new_default_client();
474+
let req = Request::from_parts(parts.clone(), Body::from_bytes(body_bytes.clone()));
475+
let res = client.request(req).await;
476+
477+
match res {
478+
Ok(response) => {
479+
if response.status().is_success() {
480+
return Ok(response);
481+
} else {
482+
println!(
483+
"Request failed with status code: {}. Request attempt {} of {}",
484+
response.status(),
485+
attempts,
486+
max_attempts
487+
);
488+
last_response = Ok(response);
489+
}
490+
}
491+
Err(e) => {
492+
println!(
493+
"Request failed with error: {}. Request attempt {} of {}",
494+
e, attempts, max_attempts
495+
);
496+
last_response = Err(e)
497+
}
498+
}
499+
500+
if attempts >= max_attempts {
501+
return Ok(last_response?);
502+
}
503+
504+
tokio::time::sleep(Duration::from_millis(delay_ms)).await;
505+
delay_ms *= 2;
506+
attempts += 1;
507+
}
508+
}
408509
}

0 commit comments

Comments
 (0)