Skip to content

Commit 91d0e15

Browse files
committed
Wrap errors returned by Queryable to custom wrapper.
This allows us to distinguish between those errors and errors returned by PromQL engine, and react appropriately. Signed-off-by: Peter Štibraný <[email protected]>
1 parent 3f304eb commit 91d0e15

File tree

2 files changed

+48
-14
lines changed

2 files changed

+48
-14
lines changed

pkg/ruler/compat.go

Lines changed: 47 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -149,16 +149,30 @@ func MetricsQueryFunc(qf rules.QueryFunc, queries, failedQueries prometheus.Coun
149149
return func(ctx context.Context, qs string, t time.Time) (promql.Vector, error) {
150150
queries.Inc()
151151
result, err := qf(ctx, qs, t)
152-
// We rely on TranslateToPromqlApiError to do its job here... it returns nil, if err is nil.
153-
// It returns promql.ErrStorage, if error should be reported back as 500.
154-
// Other errors it returns are either for canceled or timed-out queriers (we're not reporting those as failures),
155-
// or various user-errors (limits, duplicate samples, etc. ... also not failures).
156-
//
157-
// All errors will still be counted towards "evaluation failures" metrics and logged by Prometheus Ruler,
158-
// but we only want internal errors here.
159-
if _, ok := querier.TranslateToPromqlAPIError(err).(promql.ErrStorage); ok {
160-
failedQueries.Inc()
152+
153+
// We only care about errors returned by underlying Queryable. Errors returned by PromQL engine are "user-errors",
154+
// and not interesting here.
155+
qerr := QueryableError{}
156+
if err != nil && errors.As(err, &qerr) {
157+
origErr := qerr.Unwrap()
158+
159+
// Not all errors returned by Queryable are interesting, only those that would result in 500 status code.
160+
//
161+
// We rely on TranslateToPromqlApiError to do its job here... it returns nil, if err is nil.
162+
// It returns promql.ErrStorage, if error should be reported back as 500.
163+
// Other errors it returns are either for canceled or timed-out queriers (we're not reporting those as failures),
164+
// or various user-errors (limits, duplicate samples, etc. ... also not failures).
165+
//
166+
// All errors will still be counted towards "evaluation failures" metrics and logged by Prometheus Ruler,
167+
// but we only want internal errors here.
168+
if _, ok := querier.TranslateToPromqlAPIError(origErr).(promql.ErrStorage); ok {
169+
failedQueries.Inc()
170+
}
171+
172+
// Return unwrapped error.
173+
return result, origErr
161174
}
175+
162176
return result, err
163177
}
164178
}
@@ -234,10 +248,10 @@ func DefaultTenantManagerFactory(cfg Config, p Pusher, q storage.Queryable, engi
234248
}, []string{"user"})
235249
}
236250

237-
// Make sure that queryable reports errors as expected by our MetricsQueryFunc.
238-
// Note that if queryable is already wrapped into ErrorTranslateQueryable, wrapping it again
239-
// is fine (see TestApiStatusCodesDoubleWrapper in querier package).
240-
q = querier.NewErrorTranslateQueryable(q)
251+
// Wrap errors returned by Queryable to our wrapper, so that we can distinguish between those errors
252+
// and errors returned by PromQL engine. Errors from Queryable can be either caused by user (limits) or internal errors.
253+
// Errors from PromQL are always "user" errors.
254+
q = querier.NewErrorTranslateQueryableWithFn(q, WrapQueryableErrors)
241255

242256
return func(ctx context.Context, userID string, notifier *notifier.Manager, logger log.Logger, reg prometheus.Registerer) RulesManager {
243257
var queryTime prometheus.Counter = nil
@@ -260,3 +274,23 @@ func DefaultTenantManagerFactory(cfg Config, p Pusher, q storage.Queryable, engi
260274
})
261275
}
262276
}
277+
278+
type QueryableError struct {
279+
err error
280+
}
281+
282+
func (q QueryableError) Unwrap() error {
283+
return q.err
284+
}
285+
286+
func (q QueryableError) Error() string {
287+
return q.err.Error()
288+
}
289+
290+
func WrapQueryableErrors(err error) error {
291+
if err == nil {
292+
return err
293+
}
294+
295+
return QueryableError{err: err}
296+
}

pkg/ruler/compat_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ func TestMetricsQueryFuncErrors(t *testing.T) {
229229
failures := prometheus.NewCounter(prometheus.CounterOpts{})
230230

231231
mockFunc := func(ctx context.Context, q string, t time.Time) (promql.Vector, error) {
232-
return promql.Vector{}, tc.returnedError
232+
return promql.Vector{}, WrapQueryableErrors(tc.returnedError)
233233
}
234234
qf := MetricsQueryFunc(mockFunc, queries, failures)
235235

0 commit comments

Comments
 (0)