From d06f21b835becfd0d0f739eccafc5b9019566a93 Mon Sep 17 00:00:00 2001 From: amadeuszl Date: Tue, 10 Jun 2025 11:17:04 +0200 Subject: [PATCH 1/4] Add RetryingLinuxUtilizationParser --- .../Linux/RetryingLinuxUtilizationParser.cs | 90 +++++++++++++++++++ ...ceMonitoringServiceCollectionExtensions.cs | 21 +++-- .../Linux/LinuxUtilizationProviderTests.cs | 52 +++++++++++ 3 files changed, 156 insertions(+), 7 deletions(-) create mode 100644 src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Linux/RetryingLinuxUtilizationParser.cs diff --git a/src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Linux/RetryingLinuxUtilizationParser.cs b/src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Linux/RetryingLinuxUtilizationParser.cs new file mode 100644 index 00000000000..9ed3c57b1ea --- /dev/null +++ b/src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Linux/RetryingLinuxUtilizationParser.cs @@ -0,0 +1,90 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.IO; +using System.Threading; +using Microsoft.Shared.Diagnostics; + +namespace Microsoft.Extensions.Diagnostics.ResourceMonitoring.Linux; + +internal sealed class RetryingLinuxUtilizationParser : ILinuxUtilizationParser +{ + private readonly ILinuxUtilizationParser _inner; + private readonly TimeSpan _retryInterval = TimeSpan.FromMinutes(5); + private readonly TimeProvider _timeProvider; + private DateTimeOffset _lastFailure = DateTimeOffset.MinValue; + private int _unavailable; + + public RetryingLinuxUtilizationParser(ILinuxUtilizationParser inner, TimeProvider timeProvider) + { + _inner = Throw.IfNull(inner); + _timeProvider = Throw.IfNull(timeProvider); + } + + public ulong GetAvailableMemoryInBytes() => + Retry(() => _inner.GetAvailableMemoryInBytes()); + + public long GetCgroupCpuUsageInNanoseconds() => + Retry(() => _inner.GetCgroupCpuUsageInNanoseconds()); + + public (long cpuUsageNanoseconds, long elapsedPeriods) GetCgroupCpuUsageInNanosecondsAndCpuPeriodsV2() => + Retry(() => _inner.GetCgroupCpuUsageInNanosecondsAndCpuPeriodsV2()); + + public float GetCgroupLimitedCpus() => + Retry(() => _inner.GetCgroupLimitedCpus()); + + public float GetCgroupLimitV2() => + Retry(() => _inner.GetCgroupLimitV2()); + + public ulong GetHostAvailableMemory() => + Retry(() => _inner.GetHostAvailableMemory()); + + public float GetHostCpuCount() => + Retry(() => _inner.GetHostCpuCount()); + + public long GetHostCpuUsageInNanoseconds() => + Retry(() => _inner.GetHostCpuUsageInNanoseconds()); + + public ulong GetMemoryUsageInBytes() => + Retry(() => _inner.GetMemoryUsageInBytes()); + + public float GetCgroupRequestCpu() => + Retry(() => _inner.GetCgroupRequestCpu()); + + public float GetCgroupRequestCpuV2() => + Retry(() => _inner.GetCgroupRequestCpuV2()); + + public long GetCgroupPeriodsIntervalInMicroSecondsV2() => + Retry(() => _inner.GetCgroupPeriodsIntervalInMicroSecondsV2()); + +#pragma warning disable CS8603 // Possible null reference return. It will return 0 or 0.0f + private T Retry(Func func) + { + if (Volatile.Read(ref _unavailable) == 1 && _timeProvider.GetUtcNow() - _lastFailure < _retryInterval) + { + return default; + } + + try + { + var result = func(); + if (Volatile.Read(ref _unavailable) == 1) + { + _ = Interlocked.Exchange(ref _unavailable, 0); + } + + return result; + } + catch (Exception ex) when ( + ex is FileNotFoundException || + ex is DirectoryNotFoundException || + ex is UnauthorizedAccessException) + { + _lastFailure = _timeProvider.GetUtcNow(); + _ = Interlocked.Exchange(ref _unavailable, 1); + return default; + } + } +#pragma warning restore CS8603 // Possible null reference return. +} diff --git a/src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/ResourceMonitoringServiceCollectionExtensions.cs b/src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/ResourceMonitoringServiceCollectionExtensions.cs index 541984db78d..3587dbe4739 100644 --- a/src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/ResourceMonitoringServiceCollectionExtensions.cs +++ b/src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/ResourceMonitoringServiceCollectionExtensions.cs @@ -149,14 +149,21 @@ private static ResourceMonitorBuilder AddLinuxProvider(this ResourceMonitorBuild private static void PickLinuxParser(this ResourceMonitorBuilder builder) { var injectParserV2 = ResourceMonitoringLinuxCgroupVersion.GetCgroupType(); - if (injectParserV2) - { - builder.Services.TryAddSingleton(); - } - else + + builder.Services.TryAddSingleton(sp => { - builder.Services.TryAddSingleton(); - } + ILinuxUtilizationParser innerParser; + if (injectParserV2) + { + innerParser = new LinuxUtilizationParserCgroupV2(sp.GetRequiredService(), sp.GetRequiredService()); + } + else + { + innerParser = new LinuxUtilizationParserCgroupV1(sp.GetRequiredService(), sp.GetRequiredService()); + } + + return new RetryingLinuxUtilizationParser(innerParser, sp.GetRequiredService()); + }); } #endif } diff --git a/test/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring.Tests/Linux/LinuxUtilizationProviderTests.cs b/test/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring.Tests/Linux/LinuxUtilizationProviderTests.cs index e6e9a282eca..243c74ff035 100644 --- a/test/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring.Tests/Linux/LinuxUtilizationProviderTests.cs +++ b/test/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring.Tests/Linux/LinuxUtilizationProviderTests.cs @@ -9,6 +9,7 @@ using System.Threading.Tasks; using Microsoft.Extensions.Diagnostics.ResourceMonitoring.Test.Helpers; using Microsoft.Extensions.Logging.Testing; +using Microsoft.Extensions.Time.Testing; using Microsoft.Shared.Instruments; using Microsoft.TestUtilities; using Moq; @@ -272,4 +273,55 @@ public void Provider_Registers_Instruments_CgroupV2_WithoutHostCpu() Assert.Contains(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ProcessMemoryUtilization); Assert.Equal(1, samples.Single(i => i.instrument.Name == ResourceUtilizationInstruments.ProcessMemoryUtilization).value); } + + [Fact] + public void Provider_Uses_RetryingLinuxUtilizationParser_CgroupV1() + { + var logger = new FakeLogger(); + var options = Options.Options.Create(new ResourceMonitoringOptions()); + using var meter = new Meter(nameof(Provider_Uses_RetryingLinuxUtilizationParser_CgroupV1)); + var meterFactoryMock = new Mock(); + meterFactoryMock.Setup(x => x.Create(It.IsAny())).Returns(meter); + + var innerParserMock = new Mock(); + int callCount = 0; + innerParserMock.Setup(p => p.GetAvailableMemoryInBytes()) // TODO: it makes SystemResources in ctor to receive 0f which breaks the behavior + .Returns(() => + { + if (callCount++ == 0) + { + throw new FileNotFoundException(); + } + + return 1234UL; + }); + + innerParserMock.Setup(p => p.GetCgroupCpuUsageInNanoseconds()).Returns(0L); + innerParserMock.Setup(p => p.GetCgroupCpuUsageInNanosecondsAndCpuPeriodsV2()).Returns((0L, 0L)); + innerParserMock.Setup(p => p.GetCgroupLimitedCpus()).Returns(1f); + innerParserMock.Setup(p => p.GetCgroupLimitV2()).Returns(1f); + innerParserMock.Setup(p => p.GetHostAvailableMemory()).Returns(1UL); + innerParserMock.Setup(p => p.GetHostCpuCount()).Returns(1f); + innerParserMock.Setup(p => p.GetHostCpuUsageInNanoseconds()).Returns(0L); + innerParserMock.Setup(p => p.GetMemoryUsageInBytes()).Returns(1UL); + innerParserMock.Setup(p => p.GetCgroupRequestCpu()).Returns(1f); + innerParserMock.Setup(p => p.GetCgroupRequestCpuV2()).Returns(1f); + innerParserMock.Setup(p => p.GetCgroupPeriodsIntervalInMicroSecondsV2()).Returns(1L); + + var timeProvider = new FakeTimeProvider(DateTimeOffset.UtcNow); + var retryingParser = new RetryingLinuxUtilizationParser(innerParserMock.Object, timeProvider); + + var provider = new LinuxUtilizationProvider(options, retryingParser, meterFactoryMock.Object, logger, timeProvider); + + var first = retryingParser.GetAvailableMemoryInBytes(); + timeProvider.Advance(TimeSpan.FromMinutes(1)); + var second = retryingParser.GetAvailableMemoryInBytes(); + timeProvider.Advance(TimeSpan.FromMinutes(6)); + var third = retryingParser.GetAvailableMemoryInBytes(); + + Assert.Equal(0UL, first); + Assert.Equal(0UL, second); + Assert.Equal(1234UL, third); + Assert.Equal(3, callCount); + } } From c22d0eed1f4b507ef0e5e2c1051228883943dcfe Mon Sep 17 00:00:00 2001 From: amadeuszl Date: Fri, 20 Jun 2025 14:14:25 +0200 Subject: [PATCH 2/4] Switch approach to use retries in Provider class --- .../Linux/LinuxUtilizationProvider.cs | 77 +++++++++++++-- .../Linux/RetryingLinuxUtilizationParser.cs | 90 ------------------ ...ceMonitoringServiceCollectionExtensions.cs | 20 ++-- .../Linux/LinuxUtilizationProviderTests.cs | 94 +++++++++++-------- 4 files changed, 132 insertions(+), 149 deletions(-) delete mode 100644 src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Linux/RetryingLinuxUtilizationParser.cs diff --git a/src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Linux/LinuxUtilizationProvider.cs b/src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Linux/LinuxUtilizationProvider.cs index c6dde5c0da1..90636eb23b2 100644 --- a/src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Linux/LinuxUtilizationProvider.cs +++ b/src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Linux/LinuxUtilizationProvider.cs @@ -2,7 +2,10 @@ // The .NET Foundation licenses this file to you under the MIT license. using System; +using System.Collections.Generic; using System.Diagnostics.Metrics; +using System.Linq; +using System.Threading; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Logging.Abstractions; using Microsoft.Extensions.Options; @@ -33,6 +36,10 @@ internal sealed class LinuxUtilizationProvider : ISnapshotProvider private readonly double _scaleRelativeToCpuRequest; private readonly double _scaleRelativeToCpuRequestForTrackerApi; + private readonly TimeSpan _retryInterval = TimeSpan.FromMinutes(5); + private DateTimeOffset _lastFailure = DateTimeOffset.MinValue; + private int _measurementsUnavailable; + private DateTimeOffset _refreshAfterCpu; private DateTimeOffset _refreshAfterMemory; @@ -94,18 +101,44 @@ public LinuxUtilizationProvider(IOptions options, ILi // Initialize the counters _cpuUtilizationLimit100PercentExceededCounter = meter.CreateCounter("cpu_utilization_limit_100_percent_exceeded"); _cpuUtilizationLimit110PercentExceededCounter = meter.CreateCounter("cpu_utilization_limit_110_percent_exceeded"); - _ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ContainerCpuLimitUtilization, observeValue: () => CpuUtilizationLimit(cpuLimit), unit: "1"); - _ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ContainerCpuRequestUtilization, observeValue: () => CpuUtilizationWithoutHostDelta() / cpuRequest, unit: "1"); + + _ = meter.CreateObservableGauge( + ResourceUtilizationInstruments.ContainerCpuLimitUtilization, + () => GetMeasurementWithRetry(() => CpuUtilizationLimit(cpuLimit)), + "1"); + + _ = meter.CreateObservableGauge( + name: ResourceUtilizationInstruments.ContainerCpuRequestUtilization, + observeValues: () => GetMeasurementWithRetry(() => CpuUtilizationWithoutHostDelta() / cpuRequest), + unit: "1"); } else { - _ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ContainerCpuLimitUtilization, observeValue: () => CpuUtilization() * _scaleRelativeToCpuLimit, unit: "1"); - _ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ContainerCpuRequestUtilization, observeValue: () => CpuUtilization() * _scaleRelativeToCpuRequest, unit: "1"); - _ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ProcessCpuUtilization, observeValue: () => CpuUtilization() * _scaleRelativeToCpuRequest, unit: "1"); + _ = meter.CreateObservableGauge( + name: ResourceUtilizationInstruments.ContainerCpuLimitUtilization, + observeValues: () => GetMeasurementWithRetry(() => CpuUtilization() * _scaleRelativeToCpuLimit), + unit: "1"); + + _ = meter.CreateObservableGauge( + name: ResourceUtilizationInstruments.ContainerCpuRequestUtilization, + observeValues: () => GetMeasurementWithRetry(() => CpuUtilization() * _scaleRelativeToCpuRequest), + unit: "1"); + + _ = meter.CreateObservableGauge( + name: ResourceUtilizationInstruments.ProcessCpuUtilization, + observeValues: () => GetMeasurementWithRetry(() => CpuUtilization() * _scaleRelativeToCpuRequest), + unit: "1"); } - _ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ContainerMemoryLimitUtilization, observeValue: MemoryUtilization, unit: "1"); - _ = meter.CreateObservableGauge(name: ResourceUtilizationInstruments.ProcessMemoryUtilization, observeValue: MemoryUtilization, unit: "1"); + _ = meter.CreateObservableGauge( + name: ResourceUtilizationInstruments.ContainerMemoryLimitUtilization, + observeValues: () => GetMeasurementWithRetry(() => MemoryUtilization()), + unit: "1"); + + _ = meter.CreateObservableGauge( + name: ResourceUtilizationInstruments.ProcessMemoryUtilization, + observeValues: () => GetMeasurementWithRetry(() => MemoryUtilization()), + unit: "1"); // cpuRequest is a CPU request (aka guaranteed number of CPU units) for pod, for host its 1 core // cpuLimit is a CPU limit (aka max CPU units available) for a pod or for a host. @@ -288,4 +321,34 @@ public Snapshot GetSnapshot() userTimeSinceStart: TimeSpan.FromTicks((long)(cgroupTime / Hundred * _scaleRelativeToCpuRequestForTrackerApi)), memoryUsageInBytes: memoryUsed); } + + private IEnumerable> GetMeasurementWithRetry(Func func) + { + if (Volatile.Read(ref _measurementsUnavailable) == 1 && + _timeProvider.GetUtcNow() - _lastFailure < _retryInterval) + { + return Enumerable.Empty>(); + } + + try + { + var result = func(); + if (Volatile.Read(ref _measurementsUnavailable) == 1) + { + _ = Interlocked.Exchange(ref _measurementsUnavailable, 0); + } + + return new[] { new Measurement(result) }; + } + catch (Exception ex) when ( + ex is System.IO.FileNotFoundException || + ex is System.IO.DirectoryNotFoundException || + ex is System.UnauthorizedAccessException) + { + _lastFailure = _timeProvider.GetUtcNow(); + _ = Interlocked.Exchange(ref _measurementsUnavailable, 1); + + return Enumerable.Empty>(); + } + } } diff --git a/src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Linux/RetryingLinuxUtilizationParser.cs b/src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Linux/RetryingLinuxUtilizationParser.cs deleted file mode 100644 index 9ed3c57b1ea..00000000000 --- a/src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Linux/RetryingLinuxUtilizationParser.cs +++ /dev/null @@ -1,90 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System; -using System.IO; -using System.Threading; -using Microsoft.Shared.Diagnostics; - -namespace Microsoft.Extensions.Diagnostics.ResourceMonitoring.Linux; - -internal sealed class RetryingLinuxUtilizationParser : ILinuxUtilizationParser -{ - private readonly ILinuxUtilizationParser _inner; - private readonly TimeSpan _retryInterval = TimeSpan.FromMinutes(5); - private readonly TimeProvider _timeProvider; - private DateTimeOffset _lastFailure = DateTimeOffset.MinValue; - private int _unavailable; - - public RetryingLinuxUtilizationParser(ILinuxUtilizationParser inner, TimeProvider timeProvider) - { - _inner = Throw.IfNull(inner); - _timeProvider = Throw.IfNull(timeProvider); - } - - public ulong GetAvailableMemoryInBytes() => - Retry(() => _inner.GetAvailableMemoryInBytes()); - - public long GetCgroupCpuUsageInNanoseconds() => - Retry(() => _inner.GetCgroupCpuUsageInNanoseconds()); - - public (long cpuUsageNanoseconds, long elapsedPeriods) GetCgroupCpuUsageInNanosecondsAndCpuPeriodsV2() => - Retry(() => _inner.GetCgroupCpuUsageInNanosecondsAndCpuPeriodsV2()); - - public float GetCgroupLimitedCpus() => - Retry(() => _inner.GetCgroupLimitedCpus()); - - public float GetCgroupLimitV2() => - Retry(() => _inner.GetCgroupLimitV2()); - - public ulong GetHostAvailableMemory() => - Retry(() => _inner.GetHostAvailableMemory()); - - public float GetHostCpuCount() => - Retry(() => _inner.GetHostCpuCount()); - - public long GetHostCpuUsageInNanoseconds() => - Retry(() => _inner.GetHostCpuUsageInNanoseconds()); - - public ulong GetMemoryUsageInBytes() => - Retry(() => _inner.GetMemoryUsageInBytes()); - - public float GetCgroupRequestCpu() => - Retry(() => _inner.GetCgroupRequestCpu()); - - public float GetCgroupRequestCpuV2() => - Retry(() => _inner.GetCgroupRequestCpuV2()); - - public long GetCgroupPeriodsIntervalInMicroSecondsV2() => - Retry(() => _inner.GetCgroupPeriodsIntervalInMicroSecondsV2()); - -#pragma warning disable CS8603 // Possible null reference return. It will return 0 or 0.0f - private T Retry(Func func) - { - if (Volatile.Read(ref _unavailable) == 1 && _timeProvider.GetUtcNow() - _lastFailure < _retryInterval) - { - return default; - } - - try - { - var result = func(); - if (Volatile.Read(ref _unavailable) == 1) - { - _ = Interlocked.Exchange(ref _unavailable, 0); - } - - return result; - } - catch (Exception ex) when ( - ex is FileNotFoundException || - ex is DirectoryNotFoundException || - ex is UnauthorizedAccessException) - { - _lastFailure = _timeProvider.GetUtcNow(); - _ = Interlocked.Exchange(ref _unavailable, 1); - return default; - } - } -#pragma warning restore CS8603 // Possible null reference return. -} diff --git a/src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/ResourceMonitoringServiceCollectionExtensions.cs b/src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/ResourceMonitoringServiceCollectionExtensions.cs index 3587dbe4739..e0451ff0a72 100644 --- a/src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/ResourceMonitoringServiceCollectionExtensions.cs +++ b/src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/ResourceMonitoringServiceCollectionExtensions.cs @@ -150,20 +150,14 @@ private static void PickLinuxParser(this ResourceMonitorBuilder builder) { var injectParserV2 = ResourceMonitoringLinuxCgroupVersion.GetCgroupType(); - builder.Services.TryAddSingleton(sp => + if (injectParserV2) { - ILinuxUtilizationParser innerParser; - if (injectParserV2) - { - innerParser = new LinuxUtilizationParserCgroupV2(sp.GetRequiredService(), sp.GetRequiredService()); - } - else - { - innerParser = new LinuxUtilizationParserCgroupV1(sp.GetRequiredService(), sp.GetRequiredService()); - } - - return new RetryingLinuxUtilizationParser(innerParser, sp.GetRequiredService()); - }); + builder.Services.TryAddSingleton(); + } + else + { + builder.Services.TryAddSingleton(); + } } #endif } diff --git a/test/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring.Tests/Linux/LinuxUtilizationProviderTests.cs b/test/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring.Tests/Linux/LinuxUtilizationProviderTests.cs index 243c74ff035..61c8014fa17 100644 --- a/test/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring.Tests/Linux/LinuxUtilizationProviderTests.cs +++ b/test/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring.Tests/Linux/LinuxUtilizationProviderTests.cs @@ -275,53 +275,69 @@ public void Provider_Registers_Instruments_CgroupV2_WithoutHostCpu() } [Fact] - public void Provider_Uses_RetryingLinuxUtilizationParser_CgroupV1() + public void Provider_GetMeasurementWithRetry_HandlesExceptionAndRecovers() { + var meterName = Guid.NewGuid().ToString(); var logger = new FakeLogger(); var options = Options.Options.Create(new ResourceMonitoringOptions()); - using var meter = new Meter(nameof(Provider_Uses_RetryingLinuxUtilizationParser_CgroupV1)); + using var meter = new Meter(nameof(Provider_GetMeasurementWithRetry_HandlesExceptionAndRecovers)); var meterFactoryMock = new Mock(); - meterFactoryMock.Setup(x => x.Create(It.IsAny())).Returns(meter); + meterFactoryMock.Setup(x => x.Create(It.IsAny())) + .Returns(meter); - var innerParserMock = new Mock(); - int callCount = 0; - innerParserMock.Setup(p => p.GetAvailableMemoryInBytes()) // TODO: it makes SystemResources in ctor to receive 0f which breaks the behavior - .Returns(() => + // Setup a parser that throws on first two calls, then returns 0.42 + var callCount = 0; + var parserMock = new Mock(); + parserMock.Setup(p => p.GetMemoryUsageInBytes()).Returns(() => + { + callCount++; + if (callCount <= 1) { - if (callCount++ == 0) + throw new FileNotFoundException("Simulated failure"); + } + + return 420UL; + }); + parserMock.Setup(p => p.GetAvailableMemoryInBytes()).Returns(1000UL); + parserMock.Setup(p => p.GetCgroupRequestCpu()).Returns(10f); + parserMock.Setup(p => p.GetCgroupLimitedCpus()).Returns(12f); + + var fakeTime = new FakeTimeProvider(DateTimeOffset.UtcNow); + var provider = new LinuxUtilizationProvider(options, parserMock.Object, meterFactoryMock.Object, logger, fakeTime); + + using var listener = new MeterListener + { + InstrumentPublished = (instrument, listener) => + { + if (ReferenceEquals(meter, instrument.Meter)) { - throw new FileNotFoundException(); + listener.EnableMeasurementEvents(instrument); } + } + }; + + var samples = new List<(Instrument instrument, double value)>(); + listener.SetMeasurementEventCallback((instrument, value, _, _) => + { + if (ReferenceEquals(meter, instrument.Meter)) + { + samples.Add((instrument, value)); + } + }); + + listener.Start(); + listener.RecordObservableInstruments(); + Assert.DoesNotContain(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ProcessMemoryUtilization); + + fakeTime.Advance(TimeSpan.FromMinutes(1)); + listener.RecordObservableInstruments(); + Assert.DoesNotContain(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ProcessMemoryUtilization); + + fakeTime.Advance(TimeSpan.FromMinutes(5)); + listener.RecordObservableInstruments(); + var metric = samples.SingleOrDefault(x => x.instrument.Name == ResourceUtilizationInstruments.ProcessMemoryUtilization); + Assert.Equal(0.42, metric.value); - return 1234UL; - }); - - innerParserMock.Setup(p => p.GetCgroupCpuUsageInNanoseconds()).Returns(0L); - innerParserMock.Setup(p => p.GetCgroupCpuUsageInNanosecondsAndCpuPeriodsV2()).Returns((0L, 0L)); - innerParserMock.Setup(p => p.GetCgroupLimitedCpus()).Returns(1f); - innerParserMock.Setup(p => p.GetCgroupLimitV2()).Returns(1f); - innerParserMock.Setup(p => p.GetHostAvailableMemory()).Returns(1UL); - innerParserMock.Setup(p => p.GetHostCpuCount()).Returns(1f); - innerParserMock.Setup(p => p.GetHostCpuUsageInNanoseconds()).Returns(0L); - innerParserMock.Setup(p => p.GetMemoryUsageInBytes()).Returns(1UL); - innerParserMock.Setup(p => p.GetCgroupRequestCpu()).Returns(1f); - innerParserMock.Setup(p => p.GetCgroupRequestCpuV2()).Returns(1f); - innerParserMock.Setup(p => p.GetCgroupPeriodsIntervalInMicroSecondsV2()).Returns(1L); - - var timeProvider = new FakeTimeProvider(DateTimeOffset.UtcNow); - var retryingParser = new RetryingLinuxUtilizationParser(innerParserMock.Object, timeProvider); - - var provider = new LinuxUtilizationProvider(options, retryingParser, meterFactoryMock.Object, logger, timeProvider); - - var first = retryingParser.GetAvailableMemoryInBytes(); - timeProvider.Advance(TimeSpan.FromMinutes(1)); - var second = retryingParser.GetAvailableMemoryInBytes(); - timeProvider.Advance(TimeSpan.FromMinutes(6)); - var third = retryingParser.GetAvailableMemoryInBytes(); - - Assert.Equal(0UL, first); - Assert.Equal(0UL, second); - Assert.Equal(1234UL, third); - Assert.Equal(3, callCount); + parserMock.Verify(p => p.GetMemoryUsageInBytes(), Times.Exactly(2)); } } From 933783b44a581fb8bce8587fef9250dac0318fe1 Mon Sep 17 00:00:00 2001 From: amadeuszl Date: Fri, 20 Jun 2025 15:31:13 +0200 Subject: [PATCH 3/4] Refactor and add test --- .../Linux/LinuxUtilizationProvider.cs | 2 +- .../Linux/LinuxUtilizationProviderTests.cs | 66 ++++++++++++++++++- 2 files changed, 65 insertions(+), 3 deletions(-) diff --git a/src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Linux/LinuxUtilizationProvider.cs b/src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Linux/LinuxUtilizationProvider.cs index 0426654e985..af13b8100ba 100644 --- a/src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Linux/LinuxUtilizationProvider.cs +++ b/src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Linux/LinuxUtilizationProvider.cs @@ -332,7 +332,7 @@ private IEnumerable> GetMeasurementWithRetry(Func fu try { - var result = func(); + double result = func(); if (Volatile.Read(ref _measurementsUnavailable) == 1) { _ = Interlocked.Exchange(ref _measurementsUnavailable, 0); diff --git a/test/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring.Tests/Linux/LinuxUtilizationProviderTests.cs b/test/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring.Tests/Linux/LinuxUtilizationProviderTests.cs index 61c8014fa17..6ee3c40d44d 100644 --- a/test/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring.Tests/Linux/LinuxUtilizationProviderTests.cs +++ b/test/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring.Tests/Linux/LinuxUtilizationProviderTests.cs @@ -285,7 +285,6 @@ public void Provider_GetMeasurementWithRetry_HandlesExceptionAndRecovers() meterFactoryMock.Setup(x => x.Create(It.IsAny())) .Returns(meter); - // Setup a parser that throws on first two calls, then returns 0.42 var callCount = 0; var parserMock = new Mock(); parserMock.Setup(p => p.GetMemoryUsageInBytes()).Returns(() => @@ -293,7 +292,7 @@ public void Provider_GetMeasurementWithRetry_HandlesExceptionAndRecovers() callCount++; if (callCount <= 1) { - throw new FileNotFoundException("Simulated failure"); + throw new FileNotFoundException("Simulated failure to read file"); } return 420UL; @@ -340,4 +339,67 @@ public void Provider_GetMeasurementWithRetry_HandlesExceptionAndRecovers() parserMock.Verify(p => p.GetMemoryUsageInBytes(), Times.Exactly(2)); } + + [Fact] + public void Provider_GetMeasurementWithRetry_UnhandledException_DoesNotBlockFutureReads() + { + var meterName = Guid.NewGuid().ToString(); + var logger = new FakeLogger(); + var options = Options.Options.Create(new ResourceMonitoringOptions()); + using var meter = new Meter(nameof(Provider_GetMeasurementWithRetry_UnhandledException_DoesNotBlockFutureReads)); + var meterFactoryMock = new Mock(); + meterFactoryMock.Setup(x => x.Create(It.IsAny())) + .Returns(meter); + + var callCount = 0; + var parserMock = new Mock(); + parserMock.Setup(p => p.GetMemoryUsageInBytes()).Returns(() => + { + callCount++; + if (callCount <= 2) + { + throw new InvalidOperationException("Simulated unhandled exception"); + } + + return 1234UL; + }); + parserMock.Setup(p => p.GetAvailableMemoryInBytes()).Returns(2000UL); + parserMock.Setup(p => p.GetCgroupRequestCpu()).Returns(10f); + parserMock.Setup(p => p.GetCgroupLimitedCpus()).Returns(12f); + + var fakeTime = new FakeTimeProvider(DateTimeOffset.UtcNow); + var provider = new LinuxUtilizationProvider(options, parserMock.Object, meterFactoryMock.Object, logger, fakeTime); + + using var listener = new MeterListener + { + InstrumentPublished = (instrument, listener) => + { + if (ReferenceEquals(meter, instrument.Meter)) + { + listener.EnableMeasurementEvents(instrument); + } + } + }; + + var samples = new List<(Instrument instrument, double value)>(); + listener.SetMeasurementEventCallback((instrument, value, _, _) => + { + if (ReferenceEquals(meter, instrument.Meter)) + { + samples.Add((instrument, value)); + } + }); + + listener.Start(); + + Assert.Throws(() => listener.RecordObservableInstruments()); + Assert.DoesNotContain(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ProcessMemoryUtilization); + + fakeTime.Advance(TimeSpan.FromMinutes(1)); + listener.RecordObservableInstruments(); + var metric = samples.SingleOrDefault(x => x.instrument.Name == ResourceUtilizationInstruments.ProcessMemoryUtilization); + Assert.Equal(1234f / 2000f, metric.value, 0.01f); + + parserMock.Verify(p => p.GetMemoryUsageInBytes(), Times.Exactly(3)); + } } From f14132839ec49899a876a5a7e9d591cc34be3c94 Mon Sep 17 00:00:00 2001 From: amadeuszl Date: Fri, 20 Jun 2025 15:33:24 +0200 Subject: [PATCH 4/4] Refactor --- .../ResourceMonitoringServiceCollectionExtensions.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/ResourceMonitoringServiceCollectionExtensions.cs b/src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/ResourceMonitoringServiceCollectionExtensions.cs index e0451ff0a72..541984db78d 100644 --- a/src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/ResourceMonitoringServiceCollectionExtensions.cs +++ b/src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/ResourceMonitoringServiceCollectionExtensions.cs @@ -149,7 +149,6 @@ private static ResourceMonitorBuilder AddLinuxProvider(this ResourceMonitorBuild private static void PickLinuxParser(this ResourceMonitorBuilder builder) { var injectParserV2 = ResourceMonitoringLinuxCgroupVersion.GetCgroupType(); - if (injectParserV2) { builder.Services.TryAddSingleton();