From e708900f7c20bbc18cea2696f7d70694aa8ce633 Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Sun, 28 Sep 2025 00:46:06 +0200 Subject: [PATCH 1/9] Respect rate limit values in response headers Co-Authored-By: Claude --- src/utils/requests.jl | 245 ++++++++++++++++++++++-- test/retries.jl | 419 ++++++++++++++++++++++++++++++++++++++++++ test/runtests.jl | 1 + 3 files changed, 648 insertions(+), 17 deletions(-) create mode 100644 test/retries.jl diff --git a/src/utils/requests.jl b/src/utils/requests.jl index 02c33f5..6c3923a 100644 --- a/src/utils/requests.jl +++ b/src/utils/requests.jl @@ -58,28 +58,230 @@ api_uri(api::GitHubAPI, path) = error("URI retrieval not implemented for this AP # GitHub REST Methods # ####################### -function github_request(api::GitHubAPI, request_method, endpoint; +function safe_tryparse(args...) + try + return tryparse(args...) + catch + nothing + end +end + +""" + github_retry_decision(method::String, resp::Union{HTTP.Response, Nothing}, ex::Union{Exception, Nothing}, exponential_delay::Float64; verbose::Bool=true) -> (should_retry::Bool, sleep_seconds::Float64) + +Analyzes a GitHub API response/exception to determine if a request should be retried and how long to wait. +Uses HTTP.jl's retry logic as a foundation, then adds GitHub-specific rate limiting handling. +This function does NOT perform any sleeping - it only returns the decision and timing information. +Logs retry decisions with detailed rate limit information when retries occur (if verbose=true). + +# Arguments +- `method`: HTTP method string (e.g., "GET", "POST") +- `resp`: HTTP response object (if a response was received), or `nothing` +- `ex`: Exception that occurred (if any), or `nothing` +- `exponential_delay`: The delay from ExponentialBackOff iterator +- `verbose`: Whether to log retry decisions (default: true) + +# Returns +A tuple `(should_retry, sleep_seconds)` where: +- `should_retry`: `true` if the request should be retried, `false` otherwise +- `sleep_seconds`: Number of seconds to sleep before retry (0 if no sleep needed) + +# Retry Logic +1. First uses HTTP.jl's standard retry logic (`isrecoverable` + `isidempotent`) +2. Then adds GitHub-specific rate limiting: + - **Primary rate limit**: `x-ratelimit-remaining: 0` → wait until `x-ratelimit-reset` time + - **Secondary rate limit**: Has `retry-after` header OR error message indicates secondary → + - If `retry-after` present: use that delay + - If `x-ratelimit-remaining: 0`: wait until reset time + - Otherwise: wait at least 1 minute, then use exponential backoff +""" +function github_retry_decision(method::String, resp::Union{HTTP.Response, Nothing}, ex::Union{Exception, Nothing}, exponential_delay::Float64; verbose::Bool=true) + # If we have a response, process it first (takes precedence over exceptions) + if resp !== nothing + status = resp.status + + # Don't retry successful responses + if status < 400 + return (false, 0.0) + end + else + # No response - check if we have a recoverable exception + if ex !== nothing + # If there's an exception, check if it's recoverable and if the method is idempotent + if HTTP.RetryRequest.isrecoverable(ex) && HTTP.RetryRequest.isidempotent(method) + verbose && @info "GitHub API exception, retrying in $(round(exponential_delay, digits=1))s" method=method exception=ex + return (true, exponential_delay) + end + end + # No response and no retryable exception + return (false, 0.0) + end + + # At this point we have a response with status >= 400 + + # Handle GitHub rate limiting (403, 429) with special logic + if status in (403, 429) + # Get all rate limit headers + limit = HTTP.header(resp, "x-ratelimit-limit", "") + remaining = HTTP.header(resp, "x-ratelimit-remaining", "") + used = HTTP.header(resp, "x-ratelimit-used", "") + reset_time = HTTP.header(resp, "x-ratelimit-reset", "") + resource = HTTP.header(resp, "x-ratelimit-resource", "") + retry_after = HTTP.header(resp, "retry-after", "") + + # Check response body for secondary rate limit indicators + # Note: `String` takes ownership / removes the body, so we make a copy + body = String(copy(resp.body)) + is_secondary_rate_limit = occursin("secondary rate limit", lowercase(body)) || !isempty(retry_after) + if is_secondary_rate_limit + # Secondary rate limit handling + # If retry-after header is present, respect it + delay_seconds = safe_tryparse(Float64, retry_after) + if delay_seconds !== nothing + delay_seconds = parse(Float64, retry_after) + verbose && @info "GitHub API secondary rate limit hit, retrying in $(round(delay_seconds, digits=1))s" method=method status=status limit=limit remaining=remaining used=used reset=reset_time resource=resource retry_after=retry_after + return (true, delay_seconds) + end + + # If x-ratelimit-remaining is 0, wait until reset time + reset_timestamp = safe_tryparse(Float64, reset_time) + if remaining == "0" && reset_timestamp !== nothing + current_time = time() + if reset_timestamp > current_time + delay_seconds = reset_timestamp - current_time + 1.0 + verbose && @info "GitHub API secondary rate limit hit, retrying in $(round(delay_seconds, digits=1))s" method=method status=status limit=limit remaining=remaining used=used reset=reset_time resource=resource retry_after=retry_after + return (true, delay_seconds) + end + end + + # Otherwise, wait at least 1 minute, then use exponential backoff + delay_seconds = max(60.0, exponential_delay) + verbose && @info "GitHub API secondary rate limit hit, retrying in $(round(delay_seconds, digits=1))s" method=method status=status limit=limit remaining=remaining used=used reset=reset_time resource=resource retry_after=retry_after + return (true, delay_seconds) + else + # Primary rate limit handling + # If x-ratelimit-remaining is 0, wait until reset time + reset_timestamp = safe_tryparse(Float64, reset_time) + if remaining == "0" && reset_timestamp !== nothing + current_time = time() + if reset_timestamp > current_time + delay_seconds = reset_timestamp - current_time + 1.0 + verbose && @info "GitHub API primary rate limit hit, retrying in $(round(delay_seconds, digits=1))s" method=method status=status limit=limit remaining=remaining used=used reset=reset_time resource=resource + return (true, delay_seconds) + end + end + + # For other primary rate limit cases, use exponential backoff + verbose && @info "GitHub API primary rate limit hit, retrying in $(round(exponential_delay, digits=1))s" method=method status=status limit=limit remaining=remaining used=used reset=reset_time resource=resource + return (true, exponential_delay) + end + end + + # For other HTTP errors, check if they're retryable according to HTTP.jl + if HTTP.RetryRequest.retryable(status) && HTTP.RetryRequest.isidempotent(method) + verbose && @info "GitHub API HTTP error, retrying in $(round(exponential_delay, digits=1))s" method=method status=status + return (true, exponential_delay) + end + + # For client errors (400, 401, 404, 422), don't retry + return (false, 0.0) +end + +""" + with_retries(f; method::AbstractString="GET", max_retries::Int=5, verbose::Bool=true, sleep_fn=sleep) -> Any + +Generic retry wrapper that executes function `f()` with GitHub-specific retry logic. + +# Arguments +- `f`: Function to execute (should return HTTP.Response or throw exception) +- `method`: HTTP method for retry decision logic (default: "GET") +- `max_retries`: Maximum number of retry attempts (default: 5) +- `verbose`: Whether to log retry decisions (default: true) +- `sleep_fn`: Function to call for sleeping between retries (default: sleep). For testing, can be replaced with a custom function. + +# Returns +Returns the result of `f()` if successful, or re-throws the final exception if all retries fail. + +# Example +```julia +result = with_retries(method="GET", verbose=false) do + HTTP.get("https://api.github.com/user", headers) +end +``` +""" +function with_retries(f; method::AbstractString="GET", max_retries::Int=5, verbose::Bool=true, sleep_fn=sleep) + # Create ExponentialBackOff iterator + backoff = Base.ExponentialBackOff(n = max_retries+1) + + # Try the function, with retries on failure + for (attempt, exponential_delay) in enumerate(backoff) + last_try = attempt > max_retries + local r, ex + try + r = f() + ex = nothing + if last_try + return r + end + catch e + r = nothing + ex = e + if last_try + rethrow() + end + end + + # Check if we should retry based on this attempt + should_retry, sleep_seconds = github_retry_decision(method, r, ex, exponential_delay; verbose=verbose) + + if !should_retry + # Don't retry - return response or throw exception + if ex !== nothing + throw(ex) + else + return r + end + end + + # Sleep before the next attempt (at the end of the loop) + if sleep_seconds > 0 + sleep_fn(sleep_seconds) + end + end +end + +function github_request(api::GitHubAPI, request_method::String, endpoint; auth = AnonymousAuth(), handle_error = true, - headers = Dict(), params = Dict(), allowredirects = true) + headers = Dict(), params = Dict(), allowredirects = true, + max_retries = 5) authenticate_headers!(headers, auth) params = github2json(params) api_endpoint = api_uri(api, endpoint) _headers = convert(Dict{String, String}, headers) !haskey(_headers, "User-Agent") && (_headers["User-Agent"] = "GitHub-jl") - if request_method == HTTP.get - r = request_method(URIs.URI(api_endpoint, query = params), _headers, redirect = allowredirects, status_exception = false, idle_timeout=20) - else - r = request_method(string(api_endpoint), _headers, JSON.json(params), redirect = allowredirects, status_exception = false, idle_timeout=20) + + # Use with_retries helper to handle the retry logic + r = with_retries(method = request_method, max_retries = max_retries) do + if request_method == "GET" + return HTTP.request(request_method, URIs.URI(api_endpoint, query = params), _headers; + redirect = allowredirects, status_exception = false, + idle_timeout = 20, retry = false) + else + return HTTP.request(request_method, string(api_endpoint), _headers, JSON.json(params); + redirect = allowredirects, status_exception = false, + idle_timeout = 20, retry = false) + end end + handle_error && handle_response_error(r) return r end -gh_get(api::GitHubAPI, endpoint = ""; options...) = github_request(api, HTTP.get, endpoint; options...) -gh_post(api::GitHubAPI, endpoint = ""; options...) = github_request(api, HTTP.post, endpoint; options...) -gh_put(api::GitHubAPI, endpoint = ""; options...) = github_request(api, HTTP.put, endpoint; options...) -gh_delete(api::GitHubAPI, endpoint = ""; options...) = github_request(api, HTTP.delete, endpoint; options...) -gh_patch(api::GitHubAPI, endpoint = ""; options...) = github_request(api, HTTP.patch, endpoint; options...) +gh_get(api::GitHubAPI, endpoint = ""; options...) = github_request(api, "GET", endpoint; options...) +gh_post(api::GitHubAPI, endpoint = ""; options...) = github_request(api, "POST", endpoint; options...) +gh_put(api::GitHubAPI, endpoint = ""; options...) = github_request(api, "PUT", endpoint; options...) +gh_delete(api::GitHubAPI, endpoint = ""; options...) = github_request(api, "DELETE", endpoint; options...) +gh_patch(api::GitHubAPI, endpoint = ""; options...) = github_request(api, "PATCH", endpoint; options...) gh_get_json(api::GitHubAPI, endpoint = ""; options...) = JSON.parse(HTTP.payload(gh_get(api, endpoint; options...), String)) gh_post_json(api::GitHubAPI, endpoint = ""; options...) = JSON.parse(HTTP.payload(gh_post(api, endpoint; options...), String)) @@ -113,15 +315,24 @@ end extract_page_url(link) = match(r"<.*?>", link).match[2:end-1] function github_paged_get(api, endpoint; page_limit = Inf, start_page = "", handle_error = true, - auth = AnonymousAuth(), headers = Dict(), params = Dict(), options...) + auth = AnonymousAuth(), headers = Dict(), params = Dict(), max_retries = 5, options...) authenticate_headers!(headers, auth) _headers = convert(Dict{String, String}, headers) !haskey(_headers, "User-Agent") && (_headers["User-Agent"] = "GitHub-jl") + + # Helper function to make a single request with retries + function make_request_with_retries(url, headers) + # Use with_retries helper to handle the retry logic + return with_retries(method = "GET", max_retries = max_retries) do + HTTP.request("GET", url, headers; status_exception = false, retry = false) + end + end + if isempty(start_page) - r = gh_get(api, endpoint; handle_error = handle_error, headers = _headers, params = params, auth=auth, options...) + r = gh_get(api, endpoint; handle_error = handle_error, headers = _headers, params = params, auth=auth, max_retries=max_retries, options...) else @assert isempty(params) "`start_page` kwarg is incompatible with `params` kwarg" - r = HTTP.get(start_page, headers = _headers) + r = make_request_with_retries(start_page, _headers) end results = HTTP.Response[r] page_data = Dict{String, String}() @@ -131,7 +342,7 @@ function github_paged_get(api, endpoint; page_limit = Inf, start_page = "", hand links = get_page_links(r) next_index = find_page_link(links, "next") next_index == 0 && break - r = HTTP.get(extract_page_url(links[next_index]), headers = _headers) + r = make_request_with_retries(extract_page_url(links[next_index]), _headers) handle_error && handle_response_error(r) push!(results, r) page_count += 1 @@ -184,7 +395,7 @@ function handle_response_error(r::HTTP.Response) errors = get(data, "errors", "") catch end - error("Error found in GitHub reponse:\n", + error("Error found in GitHub response:\n", "\tStatus Code: $(r.status)\n", ((isempty(message) && isempty(errors)) ? ("\tBody: $body",) : @@ -210,4 +421,4 @@ function check_disallowed_name_pattern(str::AbstractString) end return str -end \ No newline at end of file +end diff --git a/test/retries.jl b/test/retries.jl new file mode 100644 index 0000000..5afd512 --- /dev/null +++ b/test/retries.jl @@ -0,0 +1,419 @@ +using Test +using HTTP +using GitHub + +@testset "github_retry_decision" begin + + @testset "HTTP.jl recoverable exceptions" begin + # Test with a potentially recoverable exception (let HTTP.jl decide) + # We'll just test that our function handles exceptions without crashing + network_ex = Base.IOError("connection reset", 104) + should_retry, sleep_seconds = GitHub.github_retry_decision("GET", nothing, network_ex, 2.0; verbose=false) + # The actual retry decision depends on HTTP.jl's isrecoverable and isidempotent functions + @test typeof(should_retry) == Bool + @test sleep_seconds >= 0.0 + + # Test with non-recoverable exception + non_recoverable_ex = ArgumentError("invalid argument") + should_retry, sleep_seconds = GitHub.github_retry_decision("GET", nothing, non_recoverable_ex, 2.0; verbose=false) + @test should_retry == false + @test sleep_seconds == 0.0 + end + + @testset "No response and no exception" begin + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",nothing, nothing, 2.0; verbose=false) + @test should_retry == false + @test sleep_seconds == 0.0 + end + + @testset "Successful responses" begin + for status in [200, 201, 204] + resp = HTTP.Response(status) + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 2.0; verbose=false) + @test should_retry == false + @test sleep_seconds == 0.0 + end + end + + @testset "Primary rate limit - x-ratelimit-remaining = 0" begin + + # Test with future reset time - use fixed timestamp to avoid race conditions + future_time = "1900000000" # Fixed timestamp in the future (year 2030) + resp = HTTP.Response(403, [ + "x-ratelimit-remaining" => "0", + "x-ratelimit-reset" => future_time + ]) + + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 2.0; verbose=false) + @test should_retry == true + @test sleep_seconds > 100000 # Should be a large delay since reset time is far in future + + # Test with past reset time (should use exponential backoff) + past_time = "1000000000" # Fixed timestamp in the past (year 2001) + resp2 = HTTP.Response(403, [ + "x-ratelimit-remaining" => "0", + "x-ratelimit-reset" => past_time + ]) + + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp2, nothing, 5.0; verbose=false) + @test should_retry == true + @test sleep_seconds == 5.0 # Should use the exponential delay + end + + @testset "Secondary rate limit - retry-after header" begin + + # Test secondary rate limit with retry-after + body = """{"message": "You have been rate limited due to a secondary rate limit", "documentation_url": "..."}""" + resp = HTTP.Response(429, ["retry-after" => "30"]; body = Vector{UInt8}(body)) + + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 2.0; verbose=false) + @test should_retry == true + @test sleep_seconds == 30.0 # Should use retry-after value + + # Test with just retry-after header (no body message) + resp2 = HTTP.Response(429, ["retry-after" => "15"]) + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp2, nothing, 2.0; verbose=false) + @test should_retry == true + @test sleep_seconds == 15.0 + end + + @testset "Secondary rate limit - message in body" begin + + # Test secondary rate limit detected from body message + body = """{"message": "You have exceeded a secondary rate limit. Please wait one minute before trying again."}""" + resp = HTTP.Response(429; body = Vector{UInt8}(body)) + + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 2.0; verbose=false) + @test should_retry == true + @test sleep_seconds == 60.0 # Should wait at least 1 minute + + # Test with exponential delay greater than 60 seconds + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 120.0; verbose=false) + @test should_retry == true + @test sleep_seconds == 120.0 # Should use the larger exponential delay + end + + @testset "Secondary rate limit - x-ratelimit-remaining = 0" begin + + # Test secondary rate limit with reset time - use fixed timestamp to avoid race conditions + future_time = "1900000000" # Fixed timestamp in the future (year 2030) + body = """{"message": "secondary rate limit exceeded"}""" + resp = HTTP.Response(403, [ + "x-ratelimit-remaining" => "0", + "x-ratelimit-reset" => future_time + ]; body = Vector{UInt8}(body)) + + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 5.0; verbose=false) + @test should_retry == true + @test sleep_seconds > 100000 # Should be a large delay since reset time is far in future + end + + @testset "Primary rate limit - exponential backoff" begin + + # Primary rate limit without specific headers + resp = HTTP.Response(429, []) + + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 4.0; verbose=false) + @test should_retry == true + @test sleep_seconds == 4.0 # Should use exponential delay + + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 8.0; verbose=false) + @test should_retry == true + @test sleep_seconds == 8.0 + end + + @testset "Other HTTP errors" begin + + for status in [408, 409, 500, 502, 503, 504, 599] + resp = HTTP.Response(status, []) + + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 3.0; verbose=false) + @test should_retry == true + @test sleep_seconds == 3.0 # Should use exponential delay + end + end + + @testset "Non-retryable client errors" begin + + for status in [400, 401, 404, 422] + resp = HTTP.Response(status, []) + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 1.0; verbose=false) + @test should_retry == false + @test sleep_seconds == 0.0 + end + end + + @testset "Invalid header values" begin + + # Test with invalid retry-after header (should fall back to secondary rate limit minimum) + resp1 = HTTP.Response(429, ["retry-after" => "invalid"]) + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp1, nothing, 2.0; verbose=false) + @test should_retry == true + @test sleep_seconds == 60.0 # Falls back to secondary rate limit minimum (1 minute) + + # Test with invalid reset time (should fall back to exponential backoff) + resp2 = HTTP.Response(403, [ + "x-ratelimit-remaining" => "0", + "x-ratelimit-reset" => "invalid" + ]) + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp2, nothing, 3.0; verbose=false) + @test should_retry == true + @test sleep_seconds == 3.0 # Falls back to exponential backoff + end + + @testset "Rate limit header precedence" begin + + # retry-after should take precedence over x-ratelimit-reset + future_time = "1900000000" # Fixed timestamp (doesn't matter since retry-after takes precedence) + resp = HTTP.Response(429, [ + "retry-after" => "5", + "x-ratelimit-remaining" => "0", + "x-ratelimit-reset" => future_time + ]) + + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 10.0; verbose=false) + @test should_retry == true + @test sleep_seconds == 5.0 # Should use retry-after, not reset time + end + + @testset "Rate limit remaining non-zero" begin + + # Should use exponential backoff when x-ratelimit-remaining is not "0" + future_time = "1900000000" # Fixed timestamp (doesn't matter since remaining != "0") + resp = HTTP.Response(403, [ + "x-ratelimit-remaining" => "5", + "x-ratelimit-reset" => future_time + ]) + + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 3.0; verbose=false) + @test should_retry == true + @test sleep_seconds == 3.0 # Should use exponential backoff, not reset time + end + + @testset "Exception with successful response" begin + + # If we have both an exception and a response, the response should take precedence + resp = HTTP.Response(200) + ex = Base.IOError("some error", 0) + + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, ex, 2.0; verbose=false) + @test should_retry == false # Success response should not retry + @test sleep_seconds == 0.0 + end + + @testset "Request method considerations" begin + # Test with different HTTP methods to ensure retryable logic works + + # Network exception with different methods + network_ex = Base.IOError("connection refused", 111) + + # Test that both work without crashing (actual retry depends on HTTP.jl internals) + should_retry, sleep_seconds = GitHub.github_retry_decision("GET", nothing, network_ex, 1.0; verbose=false) + @test typeof(should_retry) == Bool + @test sleep_seconds >= 0.0 + + # POST behavior depends on HTTP.jl's isidempotent() function (non-idempotent methods typically don't retry) + should_retry, sleep_seconds = GitHub.github_retry_decision("POST", nothing, network_ex, 1.0; verbose=false) + @test typeof(should_retry) == Bool + @test sleep_seconds >= 0.0 + end +end + + +@testset "with_retries function tests" begin + @testset "Success after retries" begin + call_count = Ref(0) + sleep_calls = Float64[] + + # Custom sleep function that records sleep durations + function test_sleep(seconds) + push!(sleep_calls, seconds) + end + + result = GitHub.with_retries(method="GET", max_retries=2, verbose=false, sleep_fn=test_sleep) do + call_count[] += 1 + if call_count[] < 3 + # Return a rate limit response for first 2 attempts + return HTTP.Response(429, ["retry-after" => "1"]) + else + # Success on 3rd attempt + return HTTP.Response(200) + end + end + + @test result.status == 200 + @test call_count[] ==3 + @test length(sleep_calls) == 2 # Should have slept twice + @test all(s -> s >= 1.0, sleep_calls) # Should respect retry-after + end + + @testset "Exception handling" begin + call_count = Ref(0) + sleep_calls = Float64[] + + function test_sleep(seconds) + push!(sleep_calls, seconds) + end + + # Test with recoverable exception (for GET method) + result = GitHub.with_retries(method="GET", max_retries=2, verbose=false, sleep_fn=test_sleep) do + call_count[] += 1 + if call_count[] < 3 + throw(Base.IOError("connection refused", 111)) + else + return HTTP.Response(200) + end + end + + @test result.status == 200 + @test call_count[] ==3 + @test length(sleep_calls) == 2 + end + + @testset "Non-retryable exceptions" begin + # Test that ArgumentError is not retried + @test_throws ArgumentError GitHub.with_retries(method="GET", verbose=false) do + throw(ArgumentError("invalid argument")) + end + end + + @testset "Max retries exhausted" begin + call_count = Ref(0) + sleep_calls = Float64[] + + function test_sleep(seconds) + push!(sleep_calls, seconds) + end + + # Test exhausting retries with rate limit responses + result = GitHub.with_retries(method="GET", max_retries=2, verbose=false, sleep_fn=test_sleep) do + call_count[] += 1 + return HTTP.Response(429, ["retry-after" => "0.1"]) # Always return rate limit + end + + @test result.status == 429 # Should return the final failed response + @test call_count[] ==3 # Initial + 2 retries + @test length(sleep_calls) == 2 + end + + @testset "Max retries exhausted with exception" begin + call_count = Ref(0) + + # Test exhausting retries with exceptions + @test_throws Base.IOError GitHub.with_retries(method="GET", max_retries=1, verbose=false, sleep_fn=x->nothing) do + call_count[] += 1 + throw(Base.IOError("persistent error", 104)) + end + + @test call_count[] ==2 # Initial + 1 retry + end + + @testset "Non-idempotent methods" begin + call_count = Ref(0) + + # POST requests should not retry on exceptions (non-idempotent) + @test_throws Base.IOError GitHub.with_retries(method="POST", verbose=false) do + call_count[] += 1 + throw(Base.IOError("connection refused", 111)) + end + + @test call_count[] ==1 # Should not retry + end + + @testset "GitHub rate limit handling" begin + call_count = Ref(0) + sleep_calls = Float64[] + + function test_sleep(seconds) + push!(sleep_calls, seconds) + end + + # Test primary rate limit with reset time + current_time = time() + reset_time = string(Int(round(current_time)) + 500000000) # 500000000 seconds from now + + result = GitHub.with_retries(method="GET", max_retries=1, verbose=false, sleep_fn=test_sleep) do + call_count[] += 1 + if call_count[] == 1 + return HTTP.Response(403, [ + "x-ratelimit-remaining" => "0", + "x-ratelimit-reset" => reset_time + ]) + else + return HTTP.Response(200) + end + end + + @test result.status == 200 + @test call_count[] ==2 + @test length(sleep_calls) == 1 + @test sleep_calls[1] >= 5.0 # Should wait at least until reset time + end + + @testset "Secondary rate limit with retry-after" begin + call_count = Ref(0) + sleep_calls = Float64[] + + function test_sleep(seconds) + push!(sleep_calls, seconds) + end + + body = """{"message": "You have exceeded a secondary rate limit."}""" + + result = GitHub.with_retries(method="GET", max_retries=1, verbose=false, sleep_fn=test_sleep) do + call_count[] += 1 + if call_count[] == 1 + return HTTP.Response(429, ["retry-after" => "3"]; body = Vector{UInt8}(body)) + else + return HTTP.Response(200) + end + end + + @test result.status == 200 + @test call_count[] == 2 + @test length(sleep_calls) == 1 + @test sleep_calls[1] == 3.0 # Should respect retry-after exactly + end + + @testset "Non-retryable HTTP errors" begin + call_count = Ref(0) + + # 404 should not be retried + result = GitHub.with_retries(method="GET", verbose=false) do + call_count[] += 1 + return HTTP.Response(404) + end + + @test result.status == 404 + @test call_count[] ==1 # Should not retry + end + + @testset "Zero max_retries" begin + call_count = Ref(0) + + # With max_retries=0, should only try once + result = GitHub.with_retries(method="GET", max_retries=0, verbose=false) do + call_count[] += 1 + return HTTP.Response(429) # Rate limit response + end + + @test result.status == 429 + @test call_count[] ==1 # Should not retry at all + end + + @testset "Sleep function not called on success" begin + sleep_called = Ref(false) + + function test_sleep(seconds) + sleep_called[] = true + end + + result = GitHub.with_retries(method="GET", verbose=false, sleep_fn=test_sleep) do + return HTTP.Response(200) + end + + @test result.status == 200 + @test !sleep_called[] + end + +end diff --git a/test/runtests.jl b/test/runtests.jl index 9e45211..f1c1814 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -7,6 +7,7 @@ include("ghtype_tests.jl") include("event_tests.jl") include("read_only_api_tests.jl") include("auth_tests.jl") +include("retries.jl") @testset "SSH keygen" begin pubkey, privkey = GitHub.genkeys(keycomment="GitHub.jl") From 195dd74f38efe614a983b06f60a9d106afab3cb0 Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Sun, 28 Sep 2025 00:47:11 +0200 Subject: [PATCH 2/9] add note --- src/utils/requests.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/utils/requests.jl b/src/utils/requests.jl index 6c3923a..8aeb83c 100644 --- a/src/utils/requests.jl +++ b/src/utils/requests.jl @@ -94,6 +94,8 @@ A tuple `(should_retry, sleep_seconds)` where: - If `retry-after` present: use that delay - If `x-ratelimit-remaining: 0`: wait until reset time - Otherwise: wait at least 1 minute, then use exponential backoff + +This follows the [documentation from GitHub](https://docs.github.com/en/rest/using-the-rest-api/rate-limits-for-the-rest-api?apiVersion=2022-11-28#exceeding-the-rate-limit) as of 2025. """ function github_retry_decision(method::String, resp::Union{HTTP.Response, Nothing}, ex::Union{Exception, Nothing}, exponential_delay::Float64; verbose::Bool=true) # If we have a response, process it first (takes precedence over exceptions) From c83ee210fa60667e19bd0cd97804aecb0b8a0d1b Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Sun, 28 Sep 2025 00:52:35 +0200 Subject: [PATCH 3/9] rm useless comments --- src/utils/requests.jl | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/utils/requests.jl b/src/utils/requests.jl index 8aeb83c..00480da 100644 --- a/src/utils/requests.jl +++ b/src/utils/requests.jl @@ -185,7 +185,6 @@ function github_retry_decision(method::String, resp::Union{HTTP.Response, Nothin return (true, exponential_delay) end - # For client errors (400, 401, 404, 422), don't retry return (false, 0.0) end @@ -212,10 +211,8 @@ end ``` """ function with_retries(f; method::AbstractString="GET", max_retries::Int=5, verbose::Bool=true, sleep_fn=sleep) - # Create ExponentialBackOff iterator backoff = Base.ExponentialBackOff(n = max_retries+1) - # Try the function, with retries on failure for (attempt, exponential_delay) in enumerate(backoff) last_try = attempt > max_retries local r, ex @@ -237,7 +234,6 @@ function with_retries(f; method::AbstractString="GET", max_retries::Int=5, verbo should_retry, sleep_seconds = github_retry_decision(method, r, ex, exponential_delay; verbose=verbose) if !should_retry - # Don't retry - return response or throw exception if ex !== nothing throw(ex) else @@ -245,7 +241,6 @@ function with_retries(f; method::AbstractString="GET", max_retries::Int=5, verbo end end - # Sleep before the next attempt (at the end of the loop) if sleep_seconds > 0 sleep_fn(sleep_seconds) end @@ -262,7 +257,6 @@ function github_request(api::GitHubAPI, request_method::String, endpoint; _headers = convert(Dict{String, String}, headers) !haskey(_headers, "User-Agent") && (_headers["User-Agent"] = "GitHub-jl") - # Use with_retries helper to handle the retry logic r = with_retries(method = request_method, max_retries = max_retries) do if request_method == "GET" return HTTP.request(request_method, URIs.URI(api_endpoint, query = params), _headers; @@ -322,9 +316,8 @@ function github_paged_get(api, endpoint; page_limit = Inf, start_page = "", hand _headers = convert(Dict{String, String}, headers) !haskey(_headers, "User-Agent") && (_headers["User-Agent"] = "GitHub-jl") - # Helper function to make a single request with retries + # Helper function to make a get request with retries function make_request_with_retries(url, headers) - # Use with_retries helper to handle the retry logic return with_retries(method = "GET", max_retries = max_retries) do HTTP.request("GET", url, headers; status_exception = false, retry = false) end From 5d59b004c602be451f6a95994403db29af6186fc Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Sun, 28 Sep 2025 01:18:44 +0200 Subject: [PATCH 4/9] fix logic so we don't misidentify access-forbidden 403s as rate-limits --- src/utils/requests.jl | 108 +++++++++++++++++++----------------------- test/retries.jl | 2 +- 2 files changed, 51 insertions(+), 59 deletions(-) diff --git a/src/utils/requests.jl b/src/utils/requests.jl index 00480da..46032d9 100644 --- a/src/utils/requests.jl +++ b/src/utils/requests.jl @@ -120,72 +120,64 @@ function github_retry_decision(method::String, resp::Union{HTTP.Response, Nothin end # At this point we have a response with status >= 400 + # First let us see if we want to retry it. - # Handle GitHub rate limiting (403, 429) with special logic - if status in (403, 429) - # Get all rate limit headers - limit = HTTP.header(resp, "x-ratelimit-limit", "") - remaining = HTTP.header(resp, "x-ratelimit-remaining", "") - used = HTTP.header(resp, "x-ratelimit-used", "") - reset_time = HTTP.header(resp, "x-ratelimit-reset", "") - resource = HTTP.header(resp, "x-ratelimit-resource", "") - retry_after = HTTP.header(resp, "retry-after", "") - - # Check response body for secondary rate limit indicators - # Note: `String` takes ownership / removes the body, so we make a copy - body = String(copy(resp.body)) - is_secondary_rate_limit = occursin("secondary rate limit", lowercase(body)) || !isempty(retry_after) - if is_secondary_rate_limit - # Secondary rate limit handling - # If retry-after header is present, respect it - delay_seconds = safe_tryparse(Float64, retry_after) - if delay_seconds !== nothing - delay_seconds = parse(Float64, retry_after) - verbose && @info "GitHub API secondary rate limit hit, retrying in $(round(delay_seconds, digits=1))s" method=method status=status limit=limit remaining=remaining used=used reset=reset_time resource=resource retry_after=retry_after - return (true, delay_seconds) - end + # Note: `String` takes ownership / removes the body, so we make a copy + body = String(copy(resp.body)) + is_primary_rate_limit = occursin("primary rate limit", lowercase(body)) && status in (403, 429) + is_secondary_rate_limit = occursin("secondary rate limit", lowercase(body)) && status in (403, 429) - # If x-ratelimit-remaining is 0, wait until reset time - reset_timestamp = safe_tryparse(Float64, reset_time) - if remaining == "0" && reset_timestamp !== nothing - current_time = time() - if reset_timestamp > current_time - delay_seconds = reset_timestamp - current_time + 1.0 - verbose && @info "GitHub API secondary rate limit hit, retrying in $(round(delay_seconds, digits=1))s" method=method status=status limit=limit remaining=remaining used=used reset=reset_time resource=resource retry_after=retry_after - return (true, delay_seconds) - end - end + do_retry = HTTP.RetryRequest.isidempotent(method) && (is_primary_rate_limit || is_secondary_rate_limit || HTTP.RetryRequest.retryable(status)) - # Otherwise, wait at least 1 minute, then use exponential backoff - delay_seconds = max(60.0, exponential_delay) - verbose && @info "GitHub API secondary rate limit hit, retrying in $(round(delay_seconds, digits=1))s" method=method status=status limit=limit remaining=remaining used=used reset=reset_time resource=resource retry_after=retry_after - return (true, delay_seconds) - else - # Primary rate limit handling - # If x-ratelimit-remaining is 0, wait until reset time - reset_timestamp = safe_tryparse(Float64, reset_time) - if remaining == "0" && reset_timestamp !== nothing - current_time = time() - if reset_timestamp > current_time - delay_seconds = reset_timestamp - current_time + 1.0 - verbose && @info "GitHub API primary rate limit hit, retrying in $(round(delay_seconds, digits=1))s" method=method status=status limit=limit remaining=remaining used=used reset=reset_time resource=resource - return (true, delay_seconds) - end - end + if !do_retry + return (false, 0.0) + end - # For other primary rate limit cases, use exponential backoff - verbose && @info "GitHub API primary rate limit hit, retrying in $(round(exponential_delay, digits=1))s" method=method status=status limit=limit remaining=remaining used=used reset=reset_time resource=resource - return (true, exponential_delay) - end + # Now we know we want to retry. We need to decide how long to wait. + + # Get all rate limit headers + limit = HTTP.header(resp, "x-ratelimit-limit", "") + remaining = HTTP.header(resp, "x-ratelimit-remaining", "") + used = HTTP.header(resp, "x-ratelimit-used", "") + reset_time = HTTP.header(resp, "x-ratelimit-reset", "") + resource = HTTP.header(resp, "x-ratelimit-resource", "") + retry_after = HTTP.header(resp, "retry-after", "") + + msg = if is_primary_rate_limit + "GitHub API primary rate limit reached" + elseif is_secondary_rate_limit + "GitHub API secondary rate limit reached" + else + "GitHub API returned $status" + end + + # If retry-after header is present, respect it + delay_seconds = safe_tryparse(Float64, retry_after) + if delay_seconds !== nothing + delay_seconds = parse(Float64, retry_after) + verbose && @info "$msg, retrying in $(round(delay_seconds, digits=1))s" method=method status=status limit=limit remaining=remaining used=used reset=reset_time resource=resource retry_after=retry_after + return (true, delay_seconds) end - # For other HTTP errors, check if they're retryable according to HTTP.jl - if HTTP.RetryRequest.retryable(status) && HTTP.RetryRequest.isidempotent(method) - verbose && @info "GitHub API HTTP error, retrying in $(round(exponential_delay, digits=1))s" method=method status=status - return (true, exponential_delay) + # If x-ratelimit-remaining is 0, wait until reset time + reset_timestamp = safe_tryparse(Float64, reset_time) + if remaining == "0" && reset_timestamp !== nothing + current_time = time() + if reset_timestamp > current_time + delay_seconds = reset_timestamp - current_time + 1.0 + verbose && @info "$msg, retrying in $(round(delay_seconds, digits=1))s" method=method status=status limit=limit remaining=remaining used=used reset=reset_time resource=resource retry_after=retry_after + return (true, delay_seconds) + end end - return (false, 0.0) + # If secondary rate limit hit without headers to guide us, + # make sure we wait at least a minute + delay_seconds = is_secondary_rate_limit ? max(60.0, exponential_delay) : exponential_delay + + # Fall back to exponential backoff + verbose && @info "$msg, retrying in $(round(delay_seconds, digits=1))s" method=method status=status + + return (true, delay_seconds) end """ diff --git a/test/retries.jl b/test/retries.jl index 5afd512..bb7485f 100644 --- a/test/retries.jl +++ b/test/retries.jl @@ -146,7 +146,7 @@ using GitHub @testset "Invalid header values" begin # Test with invalid retry-after header (should fall back to secondary rate limit minimum) - resp1 = HTTP.Response(429, ["retry-after" => "invalid"]) + resp1 = HTTP.Response(429, ["retry-after" => "invalid"], Vector{UInt8}("secondary rate limit")) should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp1, nothing, 2.0; verbose=false) @test should_retry == true @test sleep_seconds == 60.0 # Falls back to secondary rate limit minimum (1 minute) From ed8d470a895102c0bfe1461ba14ef09a3ad8e6f3 Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Sun, 28 Sep 2025 01:30:29 +0200 Subject: [PATCH 5/9] pass through `verbose` and add docs --- README.md | 5 +++++ src/utils/requests.jl | 10 +++++----- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index a75cb1e..2db7476 100644 --- a/README.md +++ b/README.md @@ -69,6 +69,11 @@ You can inspect which fields are available for a type `G<:GitHubType` by calling GitHub.jl implements a bunch of methods that make REST requests to GitHub's API. The below sections list these methods (note that a return type of `Tuple{Vector{T}, Dict}` means the result is [paginated](#pagination)). +These methods all accept keyword arguments which control how the request is made to github: + +- `max_retries::Int=5`: how many retries to attempt in requesting the resources. Retries are only made for idempotent requests ("GET", "HEAD", "OPTIONS", "TRACE", "PUT", "DELETE") and delays respect GitHub [rate limit headers](https://docs.github.com/en/rest/using-the-rest-api/rate-limits-for-the-rest-api?apiVersion=2022-11-28#checking-the-status-of-your-rate-limit). +- `verbose::Bool=true`: whether or not to log retries as Info level logs + #### Users and Organizations | method | return type | documentation | diff --git a/src/utils/requests.jl b/src/utils/requests.jl index 46032d9..7abd022 100644 --- a/src/utils/requests.jl +++ b/src/utils/requests.jl @@ -242,14 +242,14 @@ end function github_request(api::GitHubAPI, request_method::String, endpoint; auth = AnonymousAuth(), handle_error = true, headers = Dict(), params = Dict(), allowredirects = true, - max_retries = 5) + max_retries = 5, verbose = true) authenticate_headers!(headers, auth) params = github2json(params) api_endpoint = api_uri(api, endpoint) _headers = convert(Dict{String, String}, headers) !haskey(_headers, "User-Agent") && (_headers["User-Agent"] = "GitHub-jl") - r = with_retries(method = request_method, max_retries = max_retries) do + r = with_retries(; method = request_method, max_retries, verbose) do if request_method == "GET" return HTTP.request(request_method, URIs.URI(api_endpoint, query = params), _headers; redirect = allowredirects, status_exception = false, @@ -303,20 +303,20 @@ end extract_page_url(link) = match(r"<.*?>", link).match[2:end-1] function github_paged_get(api, endpoint; page_limit = Inf, start_page = "", handle_error = true, - auth = AnonymousAuth(), headers = Dict(), params = Dict(), max_retries = 5, options...) + auth = AnonymousAuth(), headers = Dict(), params = Dict(), max_retries = 5, verbose = true, options...) authenticate_headers!(headers, auth) _headers = convert(Dict{String, String}, headers) !haskey(_headers, "User-Agent") && (_headers["User-Agent"] = "GitHub-jl") # Helper function to make a get request with retries function make_request_with_retries(url, headers) - return with_retries(method = "GET", max_retries = max_retries) do + return with_retries(; method = "GET", max_retries, verbose) do HTTP.request("GET", url, headers; status_exception = false, retry = false) end end if isempty(start_page) - r = gh_get(api, endpoint; handle_error = handle_error, headers = _headers, params = params, auth=auth, max_retries=max_retries, options...) + r = gh_get(api, endpoint; handle_error, headers = _headers, params, auth, max_retries, verbose, options...) else @assert isempty(params) "`start_page` kwarg is incompatible with `params` kwarg" r = make_request_with_retries(start_page, _headers) From 76bc5064924b900c0ead752ebe14b2f553f7b6d9 Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Sun, 28 Sep 2025 01:36:15 +0200 Subject: [PATCH 6/9] don't retry access errors --- src/utils/requests.jl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/utils/requests.jl b/src/utils/requests.jl index 7abd022..5c73539 100644 --- a/src/utils/requests.jl +++ b/src/utils/requests.jl @@ -127,7 +127,11 @@ function github_retry_decision(method::String, resp::Union{HTTP.Response, Nothin is_primary_rate_limit = occursin("primary rate limit", lowercase(body)) && status in (403, 429) is_secondary_rate_limit = occursin("secondary rate limit", lowercase(body)) && status in (403, 429) - do_retry = HTTP.RetryRequest.isidempotent(method) && (is_primary_rate_limit || is_secondary_rate_limit || HTTP.RetryRequest.retryable(status)) + # `other_retry` is `HTTP.RetryRequest.retryable(status)` minus 403, + # since if it's not a rate-limit, we don't want to retry 403s. + other_retry = status in (408, 409, 429, 500, 502, 503, 504, 599) + + do_retry = HTTP.RetryRequest.isidempotent(method) && (is_primary_rate_limit || is_secondary_rate_limit || other_retry) if !do_retry return (false, 0.0) From 416b21e370b366a8a33d6d8cd751ff730a9ef02a Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Sun, 28 Sep 2025 18:50:57 +0200 Subject: [PATCH 7/9] add max_sleep_seconds, fix tests --- README.md | 1 + src/GitHub.jl | 2 +- src/utils/requests.jl | 26 +++++++++++------- test/retries.jl | 61 ++++++++++++++++++++++--------------------- 4 files changed, 50 insertions(+), 40 deletions(-) diff --git a/README.md b/README.md index 2db7476..d26de5b 100644 --- a/README.md +++ b/README.md @@ -73,6 +73,7 @@ These methods all accept keyword arguments which control how the request is made - `max_retries::Int=5`: how many retries to attempt in requesting the resources. Retries are only made for idempotent requests ("GET", "HEAD", "OPTIONS", "TRACE", "PUT", "DELETE") and delays respect GitHub [rate limit headers](https://docs.github.com/en/rest/using-the-rest-api/rate-limits-for-the-rest-api?apiVersion=2022-11-28#checking-the-status-of-your-rate-limit). - `verbose::Bool=true`: whether or not to log retries as Info level logs +- `max_sleep_seconds::Real=60*20`: if GitHub.jl intends to sleep for longer than `max_sleep_seconds` before retrying, e.g. due to rate limit headers from GitHub, throws an `RetryDelayException` instead. #### Users and Organizations diff --git a/src/GitHub.jl b/src/GitHub.jl index dd6e24c..d4bfcb3 100644 --- a/src/GitHub.jl +++ b/src/GitHub.jl @@ -44,7 +44,7 @@ export # auth.jl authenticate export # requests.jl - rate_limit + rate_limit, RetryDelayException ################################## # Owners (organizations + users) # diff --git a/src/utils/requests.jl b/src/utils/requests.jl index 5c73539..22ef536 100644 --- a/src/utils/requests.jl +++ b/src/utils/requests.jl @@ -184,8 +184,13 @@ function github_retry_decision(method::String, resp::Union{HTTP.Response, Nothin return (true, delay_seconds) end +struct RetryDelayException <: Exception + msg::String +end +Base.showerror(io::IO, e::RetryDelayException) = print(io, e.msg) + """ - with_retries(f; method::AbstractString="GET", max_retries::Int=5, verbose::Bool=true, sleep_fn=sleep) -> Any + with_retries(f; method::AbstractString="GET", max_retries::Int=5, verbose::Bool=true, sleep_fn=sleep, max_sleep_seconds::Real = 20*60) Generic retry wrapper that executes function `f()` with GitHub-specific retry logic. @@ -195,6 +200,7 @@ Generic retry wrapper that executes function `f()` with GitHub-specific retry lo - `max_retries`: Maximum number of retry attempts (default: 5) - `verbose`: Whether to log retry decisions (default: true) - `sleep_fn`: Function to call for sleeping between retries (default: sleep). For testing, can be replaced with a custom function. +- `max_sleep_seconds::Real`: maximum number of seconds to sleep when delaying before retrying. If the intended retry delay exceeds `max_sleep_seconds` an exception is thrown instead. This parameter defaults to 20*60 (20 minutes). # Returns Returns the result of `f()` if successful, or re-throws the final exception if all retries fail. @@ -206,7 +212,7 @@ result = with_retries(method="GET", verbose=false) do end ``` """ -function with_retries(f; method::AbstractString="GET", max_retries::Int=5, verbose::Bool=true, sleep_fn=sleep) +function with_retries(f; method::AbstractString="GET", max_retries::Int=5, verbose::Bool=true, sleep_fn=sleep, max_sleep_seconds::Real = 60*20) backoff = Base.ExponentialBackOff(n = max_retries+1) for (attempt, exponential_delay) in enumerate(backoff) @@ -227,7 +233,7 @@ function with_retries(f; method::AbstractString="GET", max_retries::Int=5, verbo end # Check if we should retry based on this attempt - should_retry, sleep_seconds = github_retry_decision(method, r, ex, exponential_delay; verbose=verbose) + should_retry, sleep_seconds = github_retry_decision(method, r, ex, exponential_delay; verbose) if !should_retry if ex !== nothing @@ -236,7 +242,9 @@ function with_retries(f; method::AbstractString="GET", max_retries::Int=5, verbo return r end end - + if sleep_seconds > max_sleep_seconds + throw(RetryDelayException("Retry delay $(sleep_seconds) exceeds configured maximum ($(max_sleep_seconds) seconds)")) + end if sleep_seconds > 0 sleep_fn(sleep_seconds) end @@ -246,14 +254,14 @@ end function github_request(api::GitHubAPI, request_method::String, endpoint; auth = AnonymousAuth(), handle_error = true, headers = Dict(), params = Dict(), allowredirects = true, - max_retries = 5, verbose = true) + max_retries = 5, verbose = true, max_sleep_seconds = 20*60) authenticate_headers!(headers, auth) params = github2json(params) api_endpoint = api_uri(api, endpoint) _headers = convert(Dict{String, String}, headers) !haskey(_headers, "User-Agent") && (_headers["User-Agent"] = "GitHub-jl") - r = with_retries(; method = request_method, max_retries, verbose) do + r = with_retries(; method = request_method, max_retries, verbose, max_sleep_seconds) do if request_method == "GET" return HTTP.request(request_method, URIs.URI(api_endpoint, query = params), _headers; redirect = allowredirects, status_exception = false, @@ -307,20 +315,20 @@ end extract_page_url(link) = match(r"<.*?>", link).match[2:end-1] function github_paged_get(api, endpoint; page_limit = Inf, start_page = "", handle_error = true, - auth = AnonymousAuth(), headers = Dict(), params = Dict(), max_retries = 5, verbose = true, options...) + auth = AnonymousAuth(), headers = Dict(), params = Dict(), max_retries = 5, verbose = true, max_sleep_seconds = 20*60, options...) authenticate_headers!(headers, auth) _headers = convert(Dict{String, String}, headers) !haskey(_headers, "User-Agent") && (_headers["User-Agent"] = "GitHub-jl") # Helper function to make a get request with retries function make_request_with_retries(url, headers) - return with_retries(; method = "GET", max_retries, verbose) do + return with_retries(; method = "GET", max_retries, verbose, max_sleep_seconds) do HTTP.request("GET", url, headers; status_exception = false, retry = false) end end if isempty(start_page) - r = gh_get(api, endpoint; handle_error, headers = _headers, params, auth, max_retries, verbose, options...) + r = gh_get(api, endpoint; handle_error, headers = _headers, params, auth, max_retries, verbose, max_sleep_seconds, options...) else @assert isempty(params) "`start_page` kwarg is incompatible with `params` kwarg" r = make_request_with_retries(start_page, _headers) diff --git a/test/retries.jl b/test/retries.jl index bb7485f..e6a3a43 100644 --- a/test/retries.jl +++ b/test/retries.jl @@ -2,6 +2,9 @@ using Test using HTTP using GitHub +primary_rate_limit_body = Vector{UInt8}("primary rate limit") +secondary_rate_limit_body = Vector{UInt8}("secondary rate limit") + @testset "github_retry_decision" begin @testset "HTTP.jl recoverable exceptions" begin @@ -42,9 +45,9 @@ using GitHub resp = HTTP.Response(403, [ "x-ratelimit-remaining" => "0", "x-ratelimit-reset" => future_time - ]) + ], primary_rate_limit_body) - should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 2.0; verbose=false) + should_retry, sleep_seconds = GitHub.github_retry_decision("GET", resp, nothing, 2.0; verbose=false) @test should_retry == true @test sleep_seconds > 100000 # Should be a large delay since reset time is far in future @@ -53,9 +56,9 @@ using GitHub resp2 = HTTP.Response(403, [ "x-ratelimit-remaining" => "0", "x-ratelimit-reset" => past_time - ]) + ], primary_rate_limit_body) - should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp2, nothing, 5.0; verbose=false) + should_retry, sleep_seconds = GitHub.github_retry_decision("GET", resp2, nothing, 5.0; verbose=false) @test should_retry == true @test sleep_seconds == 5.0 # Should use the exponential delay end @@ -63,8 +66,7 @@ using GitHub @testset "Secondary rate limit - retry-after header" begin # Test secondary rate limit with retry-after - body = """{"message": "You have been rate limited due to a secondary rate limit", "documentation_url": "..."}""" - resp = HTTP.Response(429, ["retry-after" => "30"]; body = Vector{UInt8}(body)) + resp = HTTP.Response(429, ["retry-after" => "30"]; body = secondary_rate_limit_body) should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 2.0; verbose=false) @test should_retry == true @@ -77,11 +79,8 @@ using GitHub @test sleep_seconds == 15.0 end - @testset "Secondary rate limit - message in body" begin - - # Test secondary rate limit detected from body message - body = """{"message": "You have exceeded a secondary rate limit. Please wait one minute before trying again."}""" - resp = HTTP.Response(429; body = Vector{UInt8}(body)) + @testset "Secondary rate limit - no headers" begin + resp = HTTP.Response(429; body = secondary_rate_limit_body) should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 2.0; verbose=false) @test should_retry == true @@ -97,20 +96,18 @@ using GitHub # Test secondary rate limit with reset time - use fixed timestamp to avoid race conditions future_time = "1900000000" # Fixed timestamp in the future (year 2030) - body = """{"message": "secondary rate limit exceeded"}""" resp = HTTP.Response(403, [ "x-ratelimit-remaining" => "0", "x-ratelimit-reset" => future_time - ]; body = Vector{UInt8}(body)) + ], secondary_rate_limit_body) should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 5.0; verbose=false) @test should_retry == true @test sleep_seconds > 100000 # Should be a large delay since reset time is far in future end - @testset "Primary rate limit - exponential backoff" begin - - # Primary rate limit without specific headers + @testset "429 - exponential backoff" begin + # 429 without specific headers or body resp = HTTP.Response(429, []) should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 4.0; verbose=false) @@ -123,7 +120,6 @@ using GitHub end @testset "Other HTTP errors" begin - for status in [408, 409, 500, 502, 503, 504, 599] resp = HTTP.Response(status, []) @@ -134,8 +130,7 @@ using GitHub end @testset "Non-retryable client errors" begin - - for status in [400, 401, 404, 422] + for status in [400, 401, 403, 404, 422] resp = HTTP.Response(status, []) should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 1.0; verbose=false) @test should_retry == false @@ -144,25 +139,23 @@ using GitHub end @testset "Invalid header values" begin - - # Test with invalid retry-after header (should fall back to secondary rate limit minimum) - resp1 = HTTP.Response(429, ["retry-after" => "invalid"], Vector{UInt8}("secondary rate limit")) + # Test with invalid retry-after header (should use secondary rate limit minimum) + resp1 = HTTP.Response(429, ["retry-after" => "invalid"], secondary_rate_limit_body) should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp1, nothing, 2.0; verbose=false) @test should_retry == true @test sleep_seconds == 60.0 # Falls back to secondary rate limit minimum (1 minute) - # Test with invalid reset time (should fall back to exponential backoff) + # Test with invalid reset time (should fall back to secondary min) resp2 = HTTP.Response(403, [ "x-ratelimit-remaining" => "0", "x-ratelimit-reset" => "invalid" - ]) + ], secondary_rate_limit_body) should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp2, nothing, 3.0; verbose=false) @test should_retry == true - @test sleep_seconds == 3.0 # Falls back to exponential backoff + @test sleep_seconds == 60.0 # minimum for secondary rate limit end @testset "Rate limit header precedence" begin - # retry-after should take precedence over x-ratelimit-reset future_time = "1900000000" # Fixed timestamp (doesn't matter since retry-after takes precedence) resp = HTTP.Response(429, [ @@ -183,7 +176,7 @@ using GitHub resp = HTTP.Response(403, [ "x-ratelimit-remaining" => "5", "x-ratelimit-reset" => future_time - ]) + ], primary_rate_limit_body) should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 3.0; verbose=false) @test should_retry == true @@ -332,13 +325,13 @@ end current_time = time() reset_time = string(Int(round(current_time)) + 500000000) # 500000000 seconds from now - result = GitHub.with_retries(method="GET", max_retries=1, verbose=false, sleep_fn=test_sleep) do + result = GitHub.with_retries(method="GET", max_retries=1, verbose=false, sleep_fn=test_sleep, max_sleep_seconds=2*500000000) do call_count[] += 1 if call_count[] == 1 return HTTP.Response(403, [ "x-ratelimit-remaining" => "0", "x-ratelimit-reset" => reset_time - ]) + ], primary_rate_limit_body) else return HTTP.Response(200) end @@ -347,7 +340,15 @@ end @test result.status == 200 @test call_count[] ==2 @test length(sleep_calls) == 1 - @test sleep_calls[1] >= 5.0 # Should wait at least until reset time + @test sleep_calls[1] >= 500000000 # Should wait at least until reset time + + + @test_throws RetryDelayException GitHub.with_retries(method="GET", max_retries=1, verbose=false, sleep_fn=test_sleep) do + return HTTP.Response(403, [ + "x-ratelimit-remaining" => "0", + "x-ratelimit-reset" => reset_time + ], primary_rate_limit_body) + end end @testset "Secondary rate limit with retry-after" begin From 7cd00d2fc975432c14f4e65f6cb566d746ba5d04 Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Mon, 6 Oct 2025 23:35:05 +0200 Subject: [PATCH 8/9] Update src/utils/requests.jl Co-authored-by: Dilum Aluthge --- src/utils/requests.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/requests.jl b/src/utils/requests.jl index 22ef536..4cf0a0d 100644 --- a/src/utils/requests.jl +++ b/src/utils/requests.jl @@ -176,9 +176,9 @@ function github_retry_decision(method::String, resp::Union{HTTP.Response, Nothin # If secondary rate limit hit without headers to guide us, # make sure we wait at least a minute + # Fall back to exponential backoff delay_seconds = is_secondary_rate_limit ? max(60.0, exponential_delay) : exponential_delay - # Fall back to exponential backoff verbose && @info "$msg, retrying in $(round(delay_seconds, digits=1))s" method=method status=status return (true, delay_seconds) From a7e1452762f94834d72ad226c2bf5195b5fc285e Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Mon, 6 Oct 2025 23:34:52 +0200 Subject: [PATCH 9/9] improve logging & add tests --- src/utils/requests.jl | 12 ++++++++---- test/retries.jl | 14 ++++++++++++++ 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/src/utils/requests.jl b/src/utils/requests.jl index 4cf0a0d..6ad9b18 100644 --- a/src/utils/requests.jl +++ b/src/utils/requests.jl @@ -66,6 +66,9 @@ function safe_tryparse(args...) end end +# convert a numeric number of seconds to a canonicalized object for printing +to_canon(val_seconds::Number) = Dates.canonicalize(Nanosecond(round(Int, val_seconds*1e9))) + """ github_retry_decision(method::String, resp::Union{HTTP.Response, Nothing}, ex::Union{Exception, Nothing}, exponential_delay::Float64; verbose::Bool=true) -> (should_retry::Bool, sleep_seconds::Float64) @@ -111,7 +114,7 @@ function github_retry_decision(method::String, resp::Union{HTTP.Response, Nothin if ex !== nothing # If there's an exception, check if it's recoverable and if the method is idempotent if HTTP.RetryRequest.isrecoverable(ex) && HTTP.RetryRequest.isidempotent(method) - verbose && @info "GitHub API exception, retrying in $(round(exponential_delay, digits=1))s" method=method exception=ex + verbose && @info "GitHub API exception, retrying in $(to_canon(exponential_delay))" method=method exception=ex delay_seconds=exponential_delay return (true, exponential_delay) end end @@ -159,7 +162,7 @@ function github_retry_decision(method::String, resp::Union{HTTP.Response, Nothin delay_seconds = safe_tryparse(Float64, retry_after) if delay_seconds !== nothing delay_seconds = parse(Float64, retry_after) - verbose && @info "$msg, retrying in $(round(delay_seconds, digits=1))s" method=method status=status limit=limit remaining=remaining used=used reset=reset_time resource=resource retry_after=retry_after + verbose && @info "$msg, retrying in $(to_canon(delay_seconds))" method=method status=status limit=limit remaining=remaining used=used reset=reset_time resource=resource retry_after=retry_after delay_seconds=delay_seconds return (true, delay_seconds) end @@ -169,7 +172,7 @@ function github_retry_decision(method::String, resp::Union{HTTP.Response, Nothin current_time = time() if reset_timestamp > current_time delay_seconds = reset_timestamp - current_time + 1.0 - verbose && @info "$msg, retrying in $(round(delay_seconds, digits=1))s" method=method status=status limit=limit remaining=remaining used=used reset=reset_time resource=resource retry_after=retry_after + verbose && @info "$msg, retrying in $(to_canon(delay_seconds))" method=method status=status limit=limit remaining=remaining used=used reset=reset_time resource=resource retry_after=retry_after delay_seconds=delay_seconds return (true, delay_seconds) end end @@ -179,7 +182,8 @@ function github_retry_decision(method::String, resp::Union{HTTP.Response, Nothin # Fall back to exponential backoff delay_seconds = is_secondary_rate_limit ? max(60.0, exponential_delay) : exponential_delay - verbose && @info "$msg, retrying in $(round(delay_seconds, digits=1))s" method=method status=status + # Fall back to exponential backoff + verbose && @info "$msg, retrying in $(to_canon(delay_seconds))" method=method status=status delay_seconds=delay_seconds return (true, delay_seconds) end diff --git a/test/retries.jl b/test/retries.jl index e6a3a43..b9dd007 100644 --- a/test/retries.jl +++ b/test/retries.jl @@ -1,10 +1,24 @@ using Test using HTTP using GitHub +using Dates primary_rate_limit_body = Vector{UInt8}("primary rate limit") secondary_rate_limit_body = Vector{UInt8}("secondary rate limit") +@testset "to_canon" begin + @test GitHub.to_canon(1) == Second(1) + @test GitHub.to_canon(60) == Minute(1) + @test GitHub.to_canon(90) == Minute(1) + Second(30) + @test GitHub.to_canon(3600) == Hour(1) + @test GitHub.to_canon(1.5) == Second(1) + Millisecond(500) + @test GitHub.to_canon(0) == Second(0) + + # Test verbose logging with canonicalized time + resp = HTTP.Response(429, ["retry-after" => "90"]) + @test_logs (:info, r"retrying in 1 minute, 30 seconds") GitHub.github_retry_decision("GET", resp, nothing, 2.0; verbose=true) +end + @testset "github_retry_decision" begin @testset "HTTP.jl recoverable exceptions" begin