Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 29 additions & 4 deletions src/Agent.Listener/Configuration/CredentialProvider.cs
Original file line number Diff line number Diff line change
Expand Up @@ -91,24 +91,35 @@ public override void EnsureCredential(IHostContext context, CommandSettings comm

private async Task<AuthenticationResult> AcquireATokenFromCacheOrDeviceCodeFlowAsync(IHostContext context, IPublicClientApplication app, IEnumerable<String> scopes)
{
var trace = context.GetTrace(nameof(AadDeviceCodeAccessToken));
trace.Info("Starting AAD token acquisition");

AuthenticationResult result = null;
var accounts = await app.GetAccountsAsync().ConfigureAwait(false);

if (accounts.Any())
{
trace.Info("Attempting silent token acquisition from cache");

// Attempt to get a token from the cache (or refresh it silently if needed)
result = await app.AcquireTokenSilent(scopes, accounts.FirstOrDefault())
.ExecuteAsync().ConfigureAwait(false);

trace.Info("Silent token acquisition completed");
}

// Cache empty or no token for account in the cache, attempt by device code flow
if (result == null)
{
trace.Info("Attempting device code flow token acquisition");

result = await GetTokenUsingDeviceCodeFlowAsync(context, app, scopes).ConfigureAwait(false);

trace.Info("Device code flow token acquisition completed");
}

trace.Info("AAD token acquisition completed");

return result;
}

Expand Down Expand Up @@ -162,20 +173,34 @@ private Uri GetTenantAuthorityUrl(IHostContext context, string serverUrl)
private async Task<AuthenticationResult> GetTokenUsingDeviceCodeFlowAsync(IHostContext context, IPublicClientApplication app, IEnumerable<string> scopes)
{
Tracing trace = context.GetTrace(nameof(AadDeviceCodeAccessToken));
trace.Info("Starting device code flow token acquisition");

AuthenticationResult result;
try
{
trace.Info("Initiating device code flow");

result = await app.AcquireTokenWithDeviceCode(scopes,
deviceCodeCallback =>
{
trace.Info("Device code callback received");

// This will print the message on the console which tells the user where to go sign-in using
// a separate browser and the code to enter once they sign in.
var term = context.GetService<ITerminal>();
term.WriteLine($"Please finish AAD device code flow in browser ({deviceCodeCallback.VerificationUrl}), user code: {deviceCodeCallback.UserCode}"); return Task.FromResult(0);
term.WriteLine($"Please finish AAD device code flow in browser ({deviceCodeCallback.VerificationUrl}), user code: {deviceCodeCallback.UserCode}");

trace.Info($"Waiting for user authentication at {deviceCodeCallback.VerificationUrl}");

return Task.FromResult(0);
}).ExecuteAsync().ConfigureAwait(false);

trace.Info("Device code flow authentication completed successfully");
}
catch (MsalServiceException)
catch (MsalServiceException msalEx)
{
trace.Error($"Device code flow failed with MSAL service exception: {msalEx.Message}");

// AADSTS50059: No tenant-identifying information found in either the request or implied by any provided credentials.
// AADSTS90133: Device Code flow is not supported under /common or /consumers endpoint.
// AADSTS90002: Tenant <tenantId or domain you used in the authority> not found. This may happen if there are
Expand All @@ -184,12 +209,12 @@ private async Task<AuthenticationResult> GetTokenUsingDeviceCodeFlowAsync(IHostC
}
catch (OperationCanceledException ex)
{
trace.Warning(ex.Message);
trace.Warning($"Device code flow cancelled: {ex.Message}");
result = null;
}
catch (MsalClientException ex)
{
trace.Warning(ex.Message);
trace.Warning($"Device code flow failed with MSAL client exception: {ex.Message}");
result = null;
}
return result;
Expand Down
8 changes: 8 additions & 0 deletions src/Agent.Listener/Configuration/OAuthCredential.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ public override void EnsureCredential(
public override VssCredentials GetVssCredentials(IHostContext context)
{
ArgUtil.NotNull(context, nameof(context));

var trace = context.GetTrace(nameof(OAuthCredential));
trace.Info("Starting OAuth credential creation");

var clientId = this.CredentialData.Data.GetValueOrDefault("clientId", null);
var authorizationUrl = this.CredentialData.Data.GetValueOrDefault("authorizationUrl", null);
Expand All @@ -39,13 +42,18 @@ public override VssCredentials GetVssCredentials(IHostContext context)
ArgUtil.NotNullOrEmpty(clientId, nameof(clientId));
ArgUtil.NotNullOrEmpty(authorizationUrl, nameof(authorizationUrl));

trace.Info($"OAuth endpoint: {oathEndpointUrl}");
trace.Info($"OAuth authorization URL: {authorizationUrl}");

// We expect the key to be in the machine store at this point. Configuration should have set all of
// this up correctly so we can use the key to generate access tokens.
var keyManager = context.GetService<IRSAKeyManager>();
var signingCredentials = VssSigningCredentials.Create(() => keyManager.GetKey());
var clientCredential = new VssOAuthJwtBearerClientCredential(clientId, authorizationUrl, signingCredentials);
var agentCredential = new VssOAuthCredential(new Uri(oathEndpointUrl, UriKind.Absolute), VssOAuthGrant.ClientCredentials, clientCredential);

trace.Info("OAuth credential creation completed");

// Construct a credentials cache with a single OAuth credential for communication. The windows credential
// is explicitly set to null to ensure we never do that negotiation.
return new VssCredentials(null, agentCredential, CredentialPromptType.DoNotPrompt);
Expand Down
2 changes: 1 addition & 1 deletion src/Agent.Sdk/Knob/AgentKnobs.cs
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ public class AgentKnobs
nameof(HttpTrace),
"Enable http trace if true",
new EnvironmentKnobSource("VSTS_AGENT_HTTPTRACE"),
new BuiltInDefaultKnobSource("false"));
new BuiltInDefaultKnobSource("true"));

public static readonly Knob NoProxy = new Knob(
nameof(NoProxy),
Expand Down
16 changes: 16 additions & 0 deletions src/Agent.Sdk/Util/VssUtil.cs
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,22 @@ public static VssConnection CreateConnection(
int httpRequestTimeoutSeconds = AgentKnobs.HttpTimeout.GetValue(_knobContext).AsInt();
settings.SendTimeout = timeout ?? TimeSpan.FromSeconds(Math.Min(Math.Max(httpRequestTimeoutSeconds, 100), 1200));

// Basic connection diagnostics
if (trace != null)
{
trace.Info($"Creating VssConnection to {serverUri} (timeout: {settings.SendTimeout.TotalSeconds}s)");

// Log critical socket exhaustion indicators
if (PlatformUtil.UseLegacyHttpHandler)
{
trace.Info("Using legacy HTTP handler - isolated connection pools");
}
else
{
trace.Info("Using modern SocketsHttpHandler - shared connection pool");
}
}

// Remove Invariant from the list of accepted languages.
//
// The constructor of VssHttpRequestSettings (base class of VssClientHttpRequestSettings) adds the current
Expand Down
32 changes: 32 additions & 0 deletions src/Agent.Worker/JobRunner.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@
using Microsoft.VisualStudio.Services.WebApi;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Net.NetworkInformation;
using System.Threading;
using System.Threading.Tasks;
using System.Net.Http;
Expand Down Expand Up @@ -392,6 +394,10 @@ public async Task<TaskResult> RunAsync(Pipelines.AgentJobRequestMessage message,
// Run all job steps
Trace.Info("Run all job steps.");
var stepsRunner = HostContext.GetService<IStepsRunner>();

// Start periodic diagnostics logging during job execution
var diagnosticsTimer = new Timer(LogDiagnostics, jobContext, (int)TimeSpan.FromMinutes(10).TotalMilliseconds, (int)TimeSpan.FromMinutes(10).TotalMilliseconds);

try
{
Trace.Info("Step execution pipeline initiated - beginning job steps execution with StepsRunner");
Expand All @@ -409,6 +415,8 @@ public async Task<TaskResult> RunAsync(Pipelines.AgentJobRequestMessage message,
}
finally
{
// Stop periodic diagnostics logging
diagnosticsTimer?.Dispose();
Trace.Info("Finalize job.");
await jobExtension.FinalizeJob(jobContext);
}
Expand Down Expand Up @@ -705,5 +713,29 @@ private void PublishTelemetry(IExecutionContext context, string area, String fea
Trace.Warning($"Unable to publish agent shutdown telemetry data. Exception: {ex}");
}
}

private void LogDiagnostics(object state)
{
try
{
var jobContext = state as IExecutionContext;
if (jobContext == null) return;

// Log network statistics using IPGlobalProperties
var ipProperties = IPGlobalProperties.GetIPGlobalProperties();
var tcpStats = ipProperties.GetTcpIPv4Statistics();

// Log memory usage
var process = Process.GetCurrentProcess();
var gcMemory = GC.GetTotalMemory(false);

jobContext.Output($"[DIAGNOSTICS] TCP Connections: {tcpStats.CurrentConnections}, Memory: WorkingSet={process.WorkingSet64 / 1024 / 1024}MB, GC={gcMemory / 1024 / 1024}MB");
}
catch (Exception ex)
{
// Don't let diagnostics logging crash the job
Trace.Warning($"Failed to log diagnostics: {ex.Message}");
}
}
}
}
Loading
Loading