@@ -17,23 +17,16 @@ internal sealed class LinuxUtilizationProvider : ISnapshotProvider
1717{
1818 private const double One = 1.0 ;
1919 private const long Hundred = 100L ;
20- private const double CpuLimitThreshold110Percent = 1.1 ;
2120
22- // Meters to track CPU utilization threshold exceedances
23- private readonly Counter < long > ? _cpuUtilizationLimit100PercentExceededCounter ;
24- private readonly Counter < long > ? _cpuUtilizationLimit110PercentExceededCounter ;
25-
26- private readonly bool _useDeltaNrPeriods ;
2721 private readonly object _cpuLocker = new ( ) ;
2822 private readonly object _memoryLocker = new ( ) ;
2923 private readonly ILogger < LinuxUtilizationProvider > _logger ;
3024 private readonly ILinuxUtilizationParser _parser ;
3125 private readonly ulong _memoryLimit ;
26+ private readonly long _cpuPeriodsInterval ;
3227 private readonly TimeSpan _cpuRefreshInterval ;
3328 private readonly TimeSpan _memoryRefreshInterval ;
3429 private readonly TimeProvider _timeProvider ;
35- private readonly double _scaleRelativeToCpuLimit ;
36- private readonly double _scaleRelativeToCpuRequest ;
3730 private readonly double _scaleRelativeToCpuRequestForTrackerApi ;
3831
3932 private readonly TimeSpan _retryInterval = TimeSpan . FromMinutes ( 5 ) ;
@@ -42,18 +35,11 @@ internal sealed class LinuxUtilizationProvider : ISnapshotProvider
4235
4336 private DateTimeOffset _refreshAfterCpu ;
4437 private DateTimeOffset _refreshAfterMemory ;
45-
46- // Track the actual timestamp when we read CPU values
47- private DateTimeOffset _lastCpuMeasurementTime ;
48-
4938 private double _cpuPercentage = double . NaN ;
5039 private double _lastCpuCoresUsed = double . NaN ;
5140 private double _memoryPercentage ;
5241 private long _previousCgroupCpuTime ;
5342 private long _previousHostCpuTime ;
54- private long _cpuUtilizationLimit100PercentExceeded ;
55- private long _cpuUtilizationLimit110PercentExceeded ;
56- private long _cpuPeriodsInterval ;
5743 private long _previousCgroupCpuPeriodCounter ;
5844 public SystemResources Resources { get ; }
5945
@@ -66,7 +52,6 @@ public LinuxUtilizationProvider(IOptions<ResourceMonitoringOptions> options, ILi
6652 DateTimeOffset now = _timeProvider . GetUtcNow ( ) ;
6753 _cpuRefreshInterval = options . Value . CpuConsumptionRefreshInterval ;
6854 _memoryRefreshInterval = options . Value . MemoryConsumptionRefreshInterval ;
69- _useDeltaNrPeriods = options . Value . UseDeltaNrPeriodsForCpuCalculation ;
7055 _refreshAfterCpu = now ;
7156 _refreshAfterMemory = now ;
7257 _memoryLimit = _parser . GetAvailableMemoryInBytes ( ) ;
@@ -76,8 +61,8 @@ public LinuxUtilizationProvider(IOptions<ResourceMonitoringOptions> options, ILi
7661 float hostCpus = _parser . GetHostCpuCount ( ) ;
7762 float cpuLimit = _parser . GetCgroupLimitedCpus ( ) ;
7863 float cpuRequest = _parser . GetCgroupRequestCpu ( ) ;
79- _scaleRelativeToCpuLimit = hostCpus / cpuLimit ;
80- _scaleRelativeToCpuRequest = hostCpus / cpuRequest ;
64+ float scaleRelativeToCpuLimit = hostCpus / cpuLimit ;
65+ float scaleRelativeToCpuRequest = hostCpus / cpuRequest ;
8166 _scaleRelativeToCpuRequestForTrackerApi = hostCpus ; // the division by cpuRequest is performed later on in the ResourceUtilization class
8267
8368#pragma warning disable CA2000 // Dispose objects before losing scope
@@ -87,46 +72,40 @@ public LinuxUtilizationProvider(IOptions<ResourceMonitoringOptions> options, ILi
8772 var meter = meterFactory . Create ( ResourceUtilizationInstruments . MeterName ) ;
8873#pragma warning restore CA2000 // Dispose objects before losing scope
8974
90- if ( options . Value . CalculateCpuUsageWithoutHostDelta )
75+ if ( options . Value . UseLinuxCalculationV2 )
9176 {
9277 cpuLimit = _parser . GetCgroupLimitV2 ( ) ;
93-
94- // Try to get the CPU request from cgroup
9578 cpuRequest = _parser . GetCgroupRequestCpuV2 ( ) ;
9679
9780 // Get Cpu periods interval from cgroup
9881 _cpuPeriodsInterval = _parser . GetCgroupPeriodsIntervalInMicroSecondsV2 ( ) ;
9982 ( _previousCgroupCpuTime , _previousCgroupCpuPeriodCounter ) = _parser . GetCgroupCpuUsageInNanosecondsAndCpuPeriodsV2 ( ) ;
10083
101- // Initialize the counters
102- _cpuUtilizationLimit100PercentExceededCounter = meter . CreateCounter < long > ( "cpu_utilization_limit_100_percent_exceeded" ) ;
103- _cpuUtilizationLimit110PercentExceededCounter = meter . CreateCounter < long > ( "cpu_utilization_limit_110_percent_exceeded" ) ;
104-
10584 _ = meter . CreateObservableGauge (
10685 ResourceUtilizationInstruments . ContainerCpuLimitUtilization ,
10786 ( ) => GetMeasurementWithRetry ( ( ) => CpuUtilizationLimit ( cpuLimit ) ) ,
10887 "1" ) ;
10988
11089 _ = meter . CreateObservableGauge (
11190 name : ResourceUtilizationInstruments . ContainerCpuRequestUtilization ,
112- observeValues : ( ) => GetMeasurementWithRetry ( ( ) => CpuUtilizationWithoutHostDelta ( ) / cpuRequest ) ,
91+ observeValues : ( ) => GetMeasurementWithRetry ( ( ) => CpuUtilizationRequest ( cpuRequest ) ) ,
11392 unit : "1" ) ;
11493 }
11594 else
11695 {
11796 _ = meter . CreateObservableGauge (
11897 name : ResourceUtilizationInstruments . ContainerCpuLimitUtilization ,
119- observeValues : ( ) => GetMeasurementWithRetry ( ( ) => CpuUtilization ( ) * _scaleRelativeToCpuLimit ) ,
98+ observeValues : ( ) => GetMeasurementWithRetry ( ( ) => CpuUtilization ( ) * scaleRelativeToCpuLimit ) ,
12099 unit : "1" ) ;
121100
122101 _ = meter . CreateObservableGauge (
123102 name : ResourceUtilizationInstruments . ContainerCpuRequestUtilization ,
124- observeValues : ( ) => GetMeasurementWithRetry ( ( ) => CpuUtilization ( ) * _scaleRelativeToCpuRequest ) ,
103+ observeValues : ( ) => GetMeasurementWithRetry ( ( ) => CpuUtilization ( ) * scaleRelativeToCpuRequest ) ,
125104 unit : "1" ) ;
126105
127106 _ = meter . CreateObservableGauge (
128107 name : ResourceUtilizationInstruments . ProcessCpuUtilization ,
129- observeValues : ( ) => GetMeasurementWithRetry ( ( ) => CpuUtilization ( ) * _scaleRelativeToCpuRequest ) ,
108+ observeValues : ( ) => GetMeasurementWithRetry ( ( ) => CpuUtilization ( ) * scaleRelativeToCpuRequest ) ,
130109 unit : "1" ) ;
131110 }
132111
@@ -148,10 +127,9 @@ public LinuxUtilizationProvider(IOptions<ResourceMonitoringOptions> options, ILi
148127 _logger . SystemResourcesInfo ( cpuLimit , cpuRequest , _memoryLimit , _memoryLimit ) ;
149128 }
150129
151- public double CpuUtilizationWithoutHostDelta ( )
130+ public double CpuUtilizationV2 ( )
152131 {
153132 DateTimeOffset now = _timeProvider . GetUtcNow ( ) ;
154- double actualElapsedNanoseconds = ( now - _lastCpuMeasurementTime ) . TotalNanoseconds ;
155133 lock ( _cpuLocker )
156134 {
157135 if ( now < _refreshAfterCpu )
@@ -160,79 +138,34 @@ public double CpuUtilizationWithoutHostDelta()
160138 }
161139 }
162140
163- var ( cpuUsageTime , cpuPeriodCounter ) = _parser . GetCgroupCpuUsageInNanosecondsAndCpuPeriodsV2 ( ) ;
141+ ( long cpuUsageTime , long cpuPeriodCounter ) = _parser . GetCgroupCpuUsageInNanosecondsAndCpuPeriodsV2 ( ) ;
164142 lock ( _cpuLocker )
165143 {
166- if ( now >= _refreshAfterCpu )
144+ if ( now < _refreshAfterCpu )
167145 {
168- long deltaCgroup = cpuUsageTime - _previousCgroupCpuTime ;
169- double coresUsed ;
170-
171- if ( _useDeltaNrPeriods )
172- {
173- long deltaPeriodCount = cpuPeriodCounter - _previousCgroupCpuPeriodCounter ;
174- long deltaCpuPeriodInNanoseconds = deltaPeriodCount * _cpuPeriodsInterval * 1000 ;
175-
176- if ( deltaCgroup > 0 && deltaPeriodCount > 0 )
177- {
178- coresUsed = deltaCgroup / ( double ) deltaCpuPeriodInNanoseconds ;
179-
180- _logger . CpuUsageDataV2 ( cpuUsageTime , _previousCgroupCpuTime , deltaCpuPeriodInNanoseconds , coresUsed ) ;
181-
182- _lastCpuCoresUsed = coresUsed ;
183- _refreshAfterCpu = now . Add ( _cpuRefreshInterval ) ;
184- _previousCgroupCpuTime = cpuUsageTime ;
185- _previousCgroupCpuPeriodCounter = cpuPeriodCounter ;
186- }
187- }
188- else
189- {
190- if ( deltaCgroup > 0 )
191- {
192- coresUsed = deltaCgroup / actualElapsedNanoseconds ;
193-
194- _logger . CpuUsageDataV2 ( cpuUsageTime , _previousCgroupCpuTime , actualElapsedNanoseconds , coresUsed ) ;
195-
196- _lastCpuCoresUsed = coresUsed ;
197- _refreshAfterCpu = now . Add ( _cpuRefreshInterval ) ;
198- _previousCgroupCpuTime = cpuUsageTime ;
199-
200- // Update the timestamp for next calculation
201- _lastCpuMeasurementTime = now ;
202- }
203- }
146+ return _lastCpuCoresUsed ;
204147 }
205- }
206148
207- return _lastCpuCoresUsed ;
208- }
149+ long deltaCgroup = cpuUsageTime - _previousCgroupCpuTime ;
150+ long deltaPeriodCount = cpuPeriodCounter - _previousCgroupCpuPeriodCounter ;
209151
210- /// <summary>
211- /// Calculates CPU utilization relative to the CPU limit.
212- /// </summary>
213- /// <param name="cpuLimit">The CPU limit to use for the calculation.</param>
214- /// <returns>CPU usage as a ratio of the limit.</returns>
215- public double CpuUtilizationLimit ( float cpuLimit )
216- {
217- double utilization = CpuUtilizationWithoutHostDelta ( ) / cpuLimit ;
152+ if ( deltaCgroup <= 0 || deltaPeriodCount <= 0 )
153+ {
154+ return _lastCpuCoresUsed ;
155+ }
218156
219- // Increment counter if utilization exceeds 1 (100%)
220- if ( utilization > 1.0 )
221- {
222- _cpuUtilizationLimit100PercentExceededCounter ? . Add ( 1 ) ;
223- _cpuUtilizationLimit100PercentExceeded ++ ;
224- _logger . CounterMessage100 ( _cpuUtilizationLimit100PercentExceeded ) ;
225- }
157+ long deltaCpuPeriodInNanoseconds = deltaPeriodCount * _cpuPeriodsInterval * 1000 ;
158+ double coresUsed = deltaCgroup / ( double ) deltaCpuPeriodInNanoseconds ;
226159
227- // Increment counter if utilization exceeds 110%
228- if ( utilization > CpuLimitThreshold110Percent )
229- {
230- _cpuUtilizationLimit110PercentExceededCounter ? . Add ( 1 ) ;
231- _cpuUtilizationLimit110PercentExceeded ++ ;
232- _logger . CounterMessage110 ( _cpuUtilizationLimit110PercentExceeded ) ;
160+ _logger . CpuUsageDataV2 ( cpuUsageTime , _previousCgroupCpuTime , deltaCpuPeriodInNanoseconds , coresUsed ) ;
161+
162+ _lastCpuCoresUsed = coresUsed ;
163+ _refreshAfterCpu = now . Add ( _cpuRefreshInterval ) ;
164+ _previousCgroupCpuTime = cpuUsageTime ;
165+ _previousCgroupCpuPeriodCounter = cpuPeriodCounter ;
233166 }
234167
235- return utilization ;
168+ return _lastCpuCoresUsed ;
236169 }
237170
238171 public double CpuUtilization ( )
@@ -252,23 +185,27 @@ public double CpuUtilization()
252185
253186 lock ( _cpuLocker )
254187 {
255- if ( now >= _refreshAfterCpu )
188+ if ( now < _refreshAfterCpu )
256189 {
257- long deltaHost = hostCpuTime - _previousHostCpuTime ;
258- long deltaCgroup = cgroupCpuTime - _previousCgroupCpuTime ;
259-
260- if ( deltaHost > 0 && deltaCgroup > 0 )
261- {
262- double percentage = Math . Min ( One , ( double ) deltaCgroup / deltaHost ) ;
190+ return _cpuPercentage ;
191+ }
263192
264- _logger . CpuUsageData ( cgroupCpuTime , hostCpuTime , _previousCgroupCpuTime , _previousHostCpuTime , percentage ) ;
193+ long deltaHost = hostCpuTime - _previousHostCpuTime ;
194+ long deltaCgroup = cgroupCpuTime - _previousCgroupCpuTime ;
265195
266- _cpuPercentage = percentage ;
267- _refreshAfterCpu = now . Add ( _cpuRefreshInterval ) ;
268- _previousCgroupCpuTime = cgroupCpuTime ;
269- _previousHostCpuTime = hostCpuTime ;
270- }
196+ if ( deltaHost <= 0 || deltaCgroup <= 0 )
197+ {
198+ return _cpuPercentage ;
271199 }
200+
201+ double percentage = Math . Min ( One , ( double ) deltaCgroup / deltaHost ) ;
202+
203+ _logger . CpuUsageData ( cgroupCpuTime , hostCpuTime , _previousCgroupCpuTime , _previousHostCpuTime , percentage ) ;
204+
205+ _cpuPercentage = percentage ;
206+ _refreshAfterCpu = now . Add ( _cpuRefreshInterval ) ;
207+ _previousCgroupCpuTime = cgroupCpuTime ;
208+ _previousHostCpuTime = hostCpuTime ;
272209 }
273210
274211 return _cpuPercentage ;
@@ -351,4 +288,9 @@ ex is System.IO.DirectoryNotFoundException ||
351288 return Enumerable . Empty < Measurement < double > > ( ) ;
352289 }
353290 }
291+
292+ // Math.Min() is used below to mitigate margin errors and various kinds of precisions losses
293+ // due to the fact that the calculation itself is not an atomic operation:
294+ private double CpuUtilizationRequest ( double cpuRequest ) => Math . Min ( One , CpuUtilizationV2 ( ) / cpuRequest ) ;
295+ private double CpuUtilizationLimit ( double cpuLimit ) => Math . Min ( One , CpuUtilizationV2 ( ) / cpuLimit ) ;
354296}
0 commit comments