Skip to content

Commit 06a62ec

Browse files
committed
Add timeout for reading filesystem usage to exit properly with hanging filesystems
1 parent f2701ce commit 06a62ec

File tree

2 files changed

+50
-9
lines changed

2 files changed

+50
-9
lines changed

cmd/filesystem.go

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
package cmd
22

33
import (
4+
"context"
45
"errors"
56
"fmt"
67
"strconv"
8+
"time"
79

810
"github.com/NETWAYS/check_system_basics/internal/common/thresholds"
911
"github.com/NETWAYS/check_system_basics/internal/filesystem"
@@ -137,7 +139,12 @@ var diskCmd = &cobra.Command{
137139
}
138140

139141
// Retrieve stats
140-
err = filesystem.GetDiskUsage(filesystemList, &FsConfig)
142+
intTimeout := time.Duration(Timeout/2) * time.Second
143+
pCtx := context.Background()
144+
ctx, cancel := context.WithTimeout(pCtx, intTimeout)
145+
defer cancel()
146+
147+
err = filesystem.GetDiskUsage(ctx, filesystemList, &FsConfig)
141148
if err != nil {
142149
check.ExitError(err)
143150
}
@@ -146,7 +153,10 @@ var diskCmd = &cobra.Command{
146153
for index := range filesystemList {
147154
sc := computeFsCheckResult(&filesystemList[index], &FsConfig)
148155

149-
sc.Output = fmt.Sprintf("%s (%.2f%% used space, %.2f%% free inodes)", sc.Output, filesystemList[index].UsageStats.UsedPercent, 100-filesystemList[index].UsageStats.InodesUsedPercent)
156+
if filesystemList[index].Error == nil {
157+
sc.Output = fmt.Sprintf("%s (%.2f%% used space, %.2f%% free inodes)", sc.Output, filesystemList[index].UsageStats.UsedPercent, 100-filesystemList[index].UsageStats.InodesUsedPercent)
158+
} else {
159+
}
150160

151161
overall.AddSubcheck(sc)
152162
}

internal/filesystem/filesystem.go

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package filesystem
22

33
import (
4+
"context"
45
"errors"
56
"regexp"
67
"syscall"
@@ -15,16 +16,46 @@ type FilesystemType struct {
1516
Error error
1617
}
1718

18-
func GetDiskUsage(fsList []FilesystemType, _ *CheckConfig) error {
19-
for index := range fsList {
20-
diskUsage, err := disk.Usage(fsList[index].PartStats.Mountpoint)
19+
type tmpFileSystemWrapper struct {
20+
usage disk.UsageStat
21+
err error
22+
}
23+
24+
func GetDiskUsageSingle(ctx context.Context, fs *FilesystemType) {
25+
resChan := make(chan tmpFileSystemWrapper, 1)
26+
go func() {
27+
tmp := tmpFileSystemWrapper{}
28+
usageStats, err := disk.Usage(fs.PartStats.Mountpoint)
29+
tmp.usage = *usageStats
30+
tmp.err = err
31+
32+
resChan <- tmp
33+
}()
34+
35+
select {
36+
case tmp := <-resChan:
37+
if tmp.err != nil {
38+
if errors.Is(tmp.err, syscall.Errno(syscall.EACCES)) {
39+
// Treat Permission denied differently?
40+
// Not sure why this is tested for that specifically
41+
fs.Error = tmp.err
42+
} else {
43+
fs.Error = tmp.err
44+
}
2145

22-
if errors.Is(err, syscall.Errno(13)) {
23-
fsList[index].Error = err
24-
continue
46+
return
2547
}
2648

27-
fsList[index].UsageStats = *diskUsage
49+
fs.UsageStats = tmp.usage
50+
case <-ctx.Done():
51+
err := errors.New("Timeout exceded for fs " + fs.PartStats.Mountpoint + ". Maybe hanging network filesystem?")
52+
fs.Error = err
53+
}
54+
}
55+
56+
func GetDiskUsage(ctx context.Context, fsList []FilesystemType, _ *CheckConfig) error {
57+
for index := range fsList {
58+
GetDiskUsageSingle(ctx, &fsList[index])
2859
}
2960

3061
return nil

0 commit comments

Comments
 (0)