From 02d3315f56be7e3ccf91e1e6fe4a7b3d1466339b Mon Sep 17 00:00:00 2001 From: Jing Liu Date: Fri, 22 Nov 2024 14:29:10 -0800 Subject: [PATCH 1/7] Support prometheus metrics Following metrics added: certmanager_csi_certificate_request_expiration_timestamp_seconds certmanager_csi_certificate_request_ready_status certmanager_csi_certificate_request_renewal_timestamp_seconds certmanager_csi_driver_issue_call_count certmanager_csi_driver_issue_error_count certmanager_csi_managed_certificate_count certmanager_csi_managed_volume_count fixes: #60 Signed-off-by: Jing Liu --- go.mod | 7 +- go.sum | 24 ++ manager/manager.go | 53 ++++- manager/manager_test.go | 17 +- metrics/certificaterequest.go | 102 ++++++++ metrics/certificaterequest_test.go | 358 +++++++++++++++++++++++++++++ metrics/metrics.go | 192 ++++++++++++++++ test/driver/driver_testing.go | 3 + test/integration/metrics_test.go | 235 +++++++++++++++++++ test/util/testutil.go | 6 +- 10 files changed, 979 insertions(+), 18 deletions(-) create mode 100644 metrics/certificaterequest.go create mode 100644 metrics/certificaterequest_test.go create mode 100644 metrics/metrics.go create mode 100644 test/integration/metrics_test.go diff --git a/go.mod b/go.mod index 2067607..a263af8 100644 --- a/go.mod +++ b/go.mod @@ -7,6 +7,7 @@ require ( github.com/container-storage-interface/spec v1.11.0 github.com/go-logr/logr v1.4.3 github.com/kubernetes-csi/csi-lib-utils v0.22.0 + github.com/prometheus/client_golang v1.23.2 github.com/stretchr/testify v1.11.1 google.golang.org/grpc v1.76.0 k8s.io/apimachinery v0.34.1 @@ -17,12 +18,15 @@ require ( ) require ( + github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/blang/semver/v4 v4.0.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/emicklei/go-restful/v3 v3.13.0 // indirect github.com/fxamacker/cbor/v2 v2.9.0 // indirect + github.com/go-asn1-ber/asn1-ber v1.5.8-0.20250403174932-29230038a667 // indirect + github.com/go-ldap/ldap/v3 v3.4.12 // indirect github.com/go-logr/zapr v1.3.0 // indirect github.com/go-openapi/jsonpointer v0.22.1 // indirect github.com/go-openapi/jsonreference v0.21.2 // indirect @@ -35,13 +39,13 @@ require ( github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect + github.com/kylelemons/godebug v1.1.0 // indirect github.com/mailru/easyjson v0.9.0 // indirect github.com/moby/sys/mountinfo v0.7.2 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect - github.com/prometheus/client_golang v1.23.2 // indirect github.com/prometheus/client_model v0.6.2 // indirect github.com/prometheus/common v0.66.1 // indirect github.com/prometheus/procfs v0.17.0 // indirect @@ -54,6 +58,7 @@ require ( go.uber.org/zap v1.27.0 // indirect go.yaml.in/yaml/v2 v2.4.2 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect + golang.org/x/crypto v0.42.0 // indirect golang.org/x/net v0.44.0 // indirect golang.org/x/oauth2 v0.31.0 // indirect golang.org/x/sys v0.36.0 // indirect diff --git a/go.sum b/go.sum index 28563c0..496c929 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,7 @@ +github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 h1:mFRzDkZVAjdal+s7s0MwaRv9igoPqLRdzOLzw/8Xvq8= +github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU= +github.com/alexbrainman/sspi v0.0.0-20250919150558-7d374ff0d59e h1:4dAU9FXIyQktpoUAgOJK3OTFc/xug0PCXYCqU0FgDKI= +github.com/alexbrainman/sspi v0.0.0-20250919150558-7d374ff0d59e/go.mod h1:cEWa1LVoE5KvSD9ONXsZrj0z6KqySlCCNKHlLzbqAt4= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM= @@ -17,6 +21,10 @@ github.com/emicklei/go-restful/v3 v3.13.0 h1:C4Bl2xDndpU6nJ4bc1jXd+uTmYPVUwkD6bF github.com/emicklei/go-restful/v3 v3.13.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM= github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ= +github.com/go-asn1-ber/asn1-ber v1.5.8-0.20250403174932-29230038a667 h1:BP4M0CvQ4S3TGls2FvczZtj5Re/2ZzkV9VwqPHH/3Bo= +github.com/go-asn1-ber/asn1-ber v1.5.8-0.20250403174932-29230038a667/go.mod h1:hEBeB/ic+5LoWskz+yKT7vGhhPYkProFKoKdwZRWMe0= +github.com/go-ldap/ldap/v3 v3.4.12 h1:1b81mv7MagXZ7+1r7cLTWmyuTqVqdwbtJSjC0DAp9s4= +github.com/go-ldap/ldap/v3 v3.4.12/go.mod h1:+SPAGcTtOfmGsCb3h1RFiq4xpp4N636G75OEace8lNo= github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= @@ -46,8 +54,22 @@ github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db h1:097atOisP2aRj7vFgY github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/hashicorp/go-uuid v1.0.3 h1:2gKiV6YVmrJ1i2CKKa9obLvRieoRGviZFL26PcT/Co8= +github.com/hashicorp/go-uuid v1.0.3/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/jcmturner/aescts/v2 v2.0.0 h1:9YKLH6ey7H4eDBXW8khjYslgyqG2xZikXP0EQFKrle8= +github.com/jcmturner/aescts/v2 v2.0.0/go.mod h1:AiaICIRyfYg35RUkr8yESTqvSy7csK90qZ5xfvvsoNs= +github.com/jcmturner/dnsutils/v2 v2.0.0 h1:lltnkeZGL0wILNvrNiVCR6Ro5PGU/SeBvVO/8c/iPbo= +github.com/jcmturner/dnsutils/v2 v2.0.0/go.mod h1:b0TnjGOvI/n42bZa+hmXL+kFJZsFT7G4t3HTlQ184QM= +github.com/jcmturner/gofork v1.7.6 h1:QH0l3hzAU1tfT3rZCnW5zXl+orbkNMMRGJfdJjHVETg= +github.com/jcmturner/gofork v1.7.6/go.mod h1:1622LH6i/EZqLloHfE7IeZ0uEJwMSUyQ/nDd82IeqRo= +github.com/jcmturner/goidentity/v6 v6.0.1 h1:VKnZd2oEIMorCTsFBnJWbExfNN7yZr3EhJAxwOkZg6o= +github.com/jcmturner/goidentity/v6 v6.0.1/go.mod h1:X1YW3bgtvwAXju7V3LCIMpY0Gbxyjn/mY9zx4tFonSg= +github.com/jcmturner/gokrb5/v8 v8.4.4 h1:x1Sv4HaTpepFkXbt2IkL29DXRf8sOfZXo8eRKh687T8= +github.com/jcmturner/gokrb5/v8 v8.4.4/go.mod h1:1btQEpgT6k+unzCwX1KdWMEwPPkkgBtP+F6aCACiMrs= +github.com/jcmturner/rpc/v2 v2.0.3 h1:7FXXj8Ti1IaVFpSAziCZWNzbNuZmnvw/i6CqLNdWfZY= +github.com/jcmturner/rpc/v2 v2.0.3/go.mod h1:VUJYCIDm3PVOEHw8sgt091/20OJjskO/YJki3ELg/Hc= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= @@ -134,6 +156,8 @@ go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.42.0 h1:chiH31gIWm57EkTXpwnqf8qeuMUi0yekh6mT2AvFlqI= +golang.org/x/crypto v0.42.0/go.mod h1:4+rDnOTJhQCx2q7/j6rAN5XDw8kPjeaXEUR2eL94ix8= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= diff --git a/manager/manager.go b/manager/manager.go index 869693d..c8c583e 100644 --- a/manager/manager.go +++ b/manager/manager.go @@ -47,6 +47,7 @@ import ( internalapi "github.com/cert-manager/csi-lib/internal/api" internalapiutil "github.com/cert-manager/csi-lib/internal/api/util" "github.com/cert-manager/csi-lib/metadata" + "github.com/cert-manager/csi-lib/metrics" "github.com/cert-manager/csi-lib/storage" ) @@ -89,6 +90,9 @@ type Options struct { // RenewalBackoffConfig configures the exponential backoff applied to certificate renewal failures. RenewalBackoffConfig *wait.Backoff + + // Metrics is used for exposing Prometheus metrics + Metrics *metrics.Metrics } // NewManager constructs a new manager used to manage volumes containing @@ -126,6 +130,9 @@ func NewManager(opts Options) (*Manager, error) { if opts.Log == nil { return nil, errors.New("log must be set") } + if opts.Metrics == nil { + opts.Metrics = metrics.New(opts.Log) + } if opts.MetadataReader == nil { return nil, errors.New("MetadataReader must be set") } @@ -241,6 +248,7 @@ func NewManager(opts Options) (*Manager, error) { metadataReader: opts.MetadataReader, clock: opts.Clock, log: *opts.Log, + metrics: opts.Metrics, generatePrivateKey: opts.GeneratePrivateKey, generateRequest: opts.GenerateRequest, @@ -375,6 +383,9 @@ type Manager struct { // No thread safety is added around this field, and it MUST NOT be used for any implementation logic. // It should not be used full-stop :). doNotUse_CallOnEachIssue func() + + // metrics is used to expose Prometheus + metrics *metrics.Metrics } // issue will step through the entire issuance flow for a volume. @@ -387,6 +398,9 @@ func (m *Manager) issue(ctx context.Context, volumeID string) error { log := m.log.WithValues("volume_id", volumeID) log.Info("Processing issuance") + // Increase issue count + m.metrics.IncrementIssueCallCount(m.nodeNameHash, volumeID) + if err := m.cleanupStaleRequests(ctx, log, volumeID); err != nil { return fmt.Errorf("cleaning up stale requests: %w", err) } @@ -594,7 +608,7 @@ func (m *Manager) handleRequest(ctx context.Context, volumeID string, meta metad // Calculate the default next issuance time. // The implementation's writeKeypair function may override this value before // writing to the storage layer. - renewalPoint, err := calculateNextIssuanceTime(req.Status.Certificate) + expiryPoint, renewalPoint, err := getExpiryAndDefaultNextIssuanceTime(req.Status.Certificate) if err != nil { return fmt.Errorf("calculating next issuance time: %w", err) } @@ -606,6 +620,10 @@ func (m *Manager) handleRequest(ctx context.Context, volumeID string, meta metad } log.V(2).Info("Wrote new keypair to storage") + // Update the request metrics. + // Using meta.NextIssuanceTime instead of renewalPoint here, in case writeKeypair overrides the value. + m.metrics.UpdateCertificateRequest(req, expiryPoint, *meta.NextIssuanceTime) + // We must explicitly delete the private key from the pending requests map so that the existing Completed // request will not be re-used upon renewal. // Without this, the renewal would pick up the existing issued certificate and re-issue, rather than requesting @@ -657,6 +675,9 @@ func (m *Manager) cleanupStaleRequests(ctx context.Context, log logr.Logger, vol } } + // Remove the CertificateRequest from the metrics. + m.metrics.RemoveCertificateRequest(toDelete.Name, toDelete.Namespace) + log.Info("Deleted CertificateRequest resource", "name", toDelete.Name, "namespace", toDelete.Namespace) } @@ -756,6 +777,8 @@ func (m *Manager) ManageVolumeImmediate(ctx context.Context, volumeID string) (m // If issuance fails, immediately return without retrying so the caller can decide // how to proceed depending on the context this method was called within. if err := m.issue(ctx, volumeID); err != nil { + // Increase issue error count + m.metrics.IncrementIssueErrorCount(m.nodeNameHash, volumeID) return true, err } } @@ -783,6 +806,8 @@ func (m *Manager) manageVolumeIfNotManaged(volumeID string) (managed bool) { // construct a new channel used to stop management of the volume stopCh := make(chan struct{}) m.managedVolumes[volumeID] = stopCh + // Increase managed volume count for this driver + m.metrics.IncrementManagedVolumeCount(m.nodeNameHash) return true } @@ -800,6 +825,10 @@ func (m *Manager) startRenewalRoutine(volumeID string) (started bool) { return false } + // Increase managed certificate count for this driver. + // We assume each volume will have one certificate to be managed. + m.metrics.IncrementManagedCertificateCount(m.nodeNameHash) + // Create a context that will be cancelled when the stopCh is closed ctx, cancel := context.WithCancel(context.Background()) go func() { @@ -835,6 +864,8 @@ func (m *Manager) startRenewalRoutine(volumeID string) (started bool) { defer issueCancel() if err := m.issue(issueCtx, volumeID); err != nil { log.Error(err, "Failed to issue certificate, retrying after applying exponential backoff") + // Increase issue error count + m.metrics.IncrementIssueErrorCount(m.nodeNameHash, volumeID) return false, nil } return true, nil @@ -874,6 +905,14 @@ func (m *Manager) UnmanageVolume(volumeID string) { if stopCh, ok := m.managedVolumes[volumeID]; ok { close(stopCh) delete(m.managedVolumes, volumeID) + if reqs, err := m.listAllRequestsForVolume(volumeID); err == nil { + // Remove the CertificateRequest from the metrics with the best effort. + for _, req := range reqs { + if req != nil { + m.metrics.RemoveCertificateRequest(req.Name, req.Namespace) + } + } + } } } @@ -919,19 +958,19 @@ func (m *Manager) Stop() { } } -// calculateNextIssuanceTime will return the default time at which the certificate -// should be renewed by the driver- 2/3rds through its lifetime (NotAfter - -// NotBefore). -func calculateNextIssuanceTime(chain []byte) (time.Time, error) { +// getExpiryAndDefaultNextIssuanceTime will return the certificate expiry time, together with +// default time at which the certificate should be renewed by the driver- 2/3rds through its +// lifetime (NotAfter - NotBefore). +func getExpiryAndDefaultNextIssuanceTime(chain []byte) (time.Time, time.Time, error) { block, _ := pem.Decode(chain) crt, err := x509.ParseCertificate(block.Bytes) if err != nil { - return time.Time{}, fmt.Errorf("parsing issued certificate: %w", err) + return time.Time{}, time.Time{}, fmt.Errorf("parsing issued certificate: %w", err) } actualDuration := crt.NotAfter.Sub(crt.NotBefore) renewBeforeNotAfter := actualDuration / 3 - return crt.NotAfter.Add(-renewBeforeNotAfter), nil + return crt.NotAfter, crt.NotAfter.Add(-renewBeforeNotAfter), nil } diff --git a/manager/manager_test.go b/manager/manager_test.go index c350be3..21bea15 100644 --- a/manager/manager_test.go +++ b/manager/manager_test.go @@ -470,7 +470,7 @@ func TestManager_cleanupStaleRequests(t *testing.T) { } } -func Test_calculateNextIssuanceTime(t *testing.T) { +func Test_getExpiryAndDefaultNextIssuanceTime(t *testing.T) { notBefore := time.Date(1970, time.January, 1, 0, 0, 0, 0, time.UTC) notAfter := time.Date(1970, time.January, 4, 0, 0, 0, 0, time.UTC) pk, err := rsa.GenerateKey(rand.Reader, 2048) @@ -490,20 +490,23 @@ func Test_calculateNextIssuanceTime(t *testing.T) { certPEM := pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: derBytes}) tests := map[string]struct { - expTime time.Time - expErr bool + expTime time.Time + renewTime time.Time + expErr bool }{ "if no attributes given, return 2/3rd certificate lifetime": { - expTime: notBefore.AddDate(0, 0, 2), - expErr: false, + expTime: notAfter, + renewTime: notBefore.AddDate(0, 0, 2), + expErr: false, }, } for name, test := range tests { t.Run(name, func(t *testing.T) { - renewTime, err := calculateNextIssuanceTime(certPEM) + expTime, renewTime, err := getExpiryAndDefaultNextIssuanceTime(certPEM) assert.Equal(t, test.expErr, err != nil) - assert.Equal(t, test.expTime, renewTime) + assert.Equal(t, test.expTime, expTime) + assert.Equal(t, test.renewTime, renewTime) }) } } diff --git a/metrics/certificaterequest.go b/metrics/certificaterequest.go new file mode 100644 index 0000000..40a91fc --- /dev/null +++ b/metrics/certificaterequest.go @@ -0,0 +1,102 @@ +/* +Copyright 2024 The cert-manager Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "time" + + "github.com/prometheus/client_golang/prometheus" + + cmapi "github.com/cert-manager/cert-manager/pkg/apis/certmanager/v1" + cmmeta "github.com/cert-manager/cert-manager/pkg/apis/meta/v1" +) + +var readyConditionStatuses = [...]cmmeta.ConditionStatus{ + cmmeta.ConditionTrue, + cmmeta.ConditionFalse, + cmmeta.ConditionUnknown, +} + +// UpdateCertificateRequest will update the given CertificateRequest's metrics for its expiry, renewal, and status condition. +func (m *Metrics) UpdateCertificateRequest(cr *cmapi.CertificateRequest, exp, renewal time.Time) { + m.updateCertificateRequestExpiryAndRenewalTime(cr, exp, renewal) + m.updateCertificateRequestStatus(cr) +} + +// updateCertificateRequestExpiryAndRenewalTime updates the expiry and renewal time of a certificate request +func (m *Metrics) updateCertificateRequestExpiryAndRenewalTime(cr *cmapi.CertificateRequest, exp, renewal time.Time) { + expiryTime := 0.0 + if !exp.IsZero() { + expiryTime = float64(exp.Unix()) + } + m.certificateRequestExpiryTimeSeconds.With(prometheus.Labels{ + "name": cr.Name, + "namespace": cr.Namespace, + "issuer_name": cr.Spec.IssuerRef.Name, + "issuer_kind": cr.Spec.IssuerRef.Kind, + "issuer_group": cr.Spec.IssuerRef.Group}).Set(expiryTime) + + renewalTime := 0.0 + if !renewal.IsZero() { + renewalTime = float64(renewal.Unix()) + } + m.certificateRequestRenewalTimeSeconds.With(prometheus.Labels{ + "name": cr.Name, + "namespace": cr.Namespace, + "issuer_name": cr.Spec.IssuerRef.Name, + "issuer_kind": cr.Spec.IssuerRef.Kind, + "issuer_group": cr.Spec.IssuerRef.Group}).Set(renewalTime) +} + +// updateCertificateRequestStatus will update the metric for that Certificate Request +func (m *Metrics) updateCertificateRequestStatus(cr *cmapi.CertificateRequest) { + for _, c := range cr.Status.Conditions { + if c.Type == cmapi.CertificateRequestConditionReady { + m.updateCertificateRequestReadyStatus(cr, c.Status) + return + } + } + + // If no status condition set yet, set to Unknown + m.updateCertificateRequestReadyStatus(cr, cmmeta.ConditionUnknown) +} + +func (m *Metrics) updateCertificateRequestReadyStatus(cr *cmapi.CertificateRequest, current cmmeta.ConditionStatus) { + for _, condition := range readyConditionStatuses { + value := 0.0 + + if current == condition { + value = 1.0 + } + + m.certificateRequestReadyStatus.With(prometheus.Labels{ + "name": cr.Name, + "namespace": cr.Namespace, + "condition": string(condition), + "issuer_name": cr.Spec.IssuerRef.Name, + "issuer_kind": cr.Spec.IssuerRef.Kind, + "issuer_group": cr.Spec.IssuerRef.Group, + }).Set(value) + } +} + +// RemoveCertificateRequest will delete the CertificateRequest metrics from continuing to be exposed. +func (m *Metrics) RemoveCertificateRequest(name, namespace string) { + m.certificateRequestExpiryTimeSeconds.DeletePartialMatch(prometheus.Labels{"name": name, "namespace": namespace}) + m.certificateRequestRenewalTimeSeconds.DeletePartialMatch(prometheus.Labels{"name": name, "namespace": namespace}) + m.certificateRequestReadyStatus.DeletePartialMatch(prometheus.Labels{"name": name, "namespace": namespace}) +} diff --git a/metrics/certificaterequest_test.go b/metrics/certificaterequest_test.go new file mode 100644 index 0000000..201b461 --- /dev/null +++ b/metrics/certificaterequest_test.go @@ -0,0 +1,358 @@ +/* +Copyright 2024 The cert-manager Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "strings" + "testing" + "time" + + "github.com/go-logr/logr/testr" + "github.com/prometheus/client_golang/prometheus/testutil" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + cmapi "github.com/cert-manager/cert-manager/pkg/apis/certmanager/v1" + cmmeta "github.com/cert-manager/cert-manager/pkg/apis/meta/v1" + testcrypto "github.com/cert-manager/cert-manager/test/unit/crypto" + "github.com/cert-manager/cert-manager/test/unit/gen" +) + +const expiryMetadata = ` + # HELP certmanager_csi_certificate_request_expiration_timestamp_seconds The date after which the certificate request expires. Expressed as a Unix Epoch Time. + # TYPE certmanager_csi_certificate_request_expiration_timestamp_seconds gauge +` + +const renewalTimeMetadata = ` + # HELP certmanager_csi_certificate_request_renewal_timestamp_seconds The number of seconds before expiration time the certificate request should renew. + # TYPE certmanager_csi_certificate_request_renewal_timestamp_seconds gauge +` + +const readyMetadata = ` + # HELP certmanager_csi_certificate_request_ready_status The ready status of the certificate request. + # TYPE certmanager_csi_certificate_request_ready_status gauge +` + +func TestCertificateRequestMetrics(t *testing.T) { + type testT struct { + cr *cmapi.CertificateRequest + notAfter, renewBefore time.Time + expectedExpiry, expectedReady, expectedRenewalTime string + } + tests := map[string]testT{ + "certificate with expiry and ready status": { + cr: gen.CertificateRequest("test-certificate-request", + gen.SetCertificateRequestNamespace("test-ns"), + gen.SetCertificateRequestIssuer(cmmeta.ObjectReference{ + Name: "test-issuer", + Kind: "test-issuer-kind", + Group: "test-issuer-group", + }), + gen.SetCertificateRequestStatusCondition(cmapi.CertificateRequestCondition{ + Type: cmapi.CertificateRequestConditionReady, + Status: cmmeta.ConditionTrue, + }), + ), + notAfter: time.Unix(2208988804, 0), + + expectedExpiry: ` + certmanager_csi_certificate_request_expiration_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 2.208988804e+09 +`, + expectedReady: ` + certmanager_csi_certificate_request_ready_status{condition="False",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 0 + certmanager_csi_certificate_request_ready_status{condition="True",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 1 + certmanager_csi_certificate_request_ready_status{condition="Unknown",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 0 +`, + expectedRenewalTime: ` + certmanager_csi_certificate_request_renewal_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 0 +`, + }, + "certificate with no expiry and no status should give an expiry of 0 and Unknown status": { + cr: gen.CertificateRequest("test-certificate-request", + gen.SetCertificateRequestNamespace("test-ns"), + gen.SetCertificateRequestIssuer(cmmeta.ObjectReference{ + Name: "test-issuer", + Kind: "test-issuer-kind", + Group: "test-issuer-group", + }), + ), + + expectedExpiry: ` + certmanager_csi_certificate_request_expiration_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 0 +`, + expectedReady: ` + certmanager_csi_certificate_request_ready_status{condition="False",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 0 + certmanager_csi_certificate_request_ready_status{condition="True",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 0 + certmanager_csi_certificate_request_ready_status{condition="Unknown",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 1 +`, + expectedRenewalTime: ` + certmanager_csi_certificate_request_renewal_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 0 +`, + }, + "certificate with expiry and status False should give an expiry and False status": { + cr: gen.CertificateRequest("test-certificate-request", + gen.SetCertificateRequestNamespace("test-ns"), + gen.SetCertificateRequestIssuer(cmmeta.ObjectReference{ + Name: "test-issuer", + Kind: "test-issuer-kind", + Group: "test-issuer-group", + }), + gen.SetCertificateRequestStatusCondition(cmapi.CertificateRequestCondition{ + Type: cmapi.CertificateRequestConditionReady, + Status: cmmeta.ConditionFalse, + }), + ), + notAfter: time.Unix(100, 0), + + expectedExpiry: ` + certmanager_csi_certificate_request_expiration_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 100 +`, + expectedReady: ` + certmanager_csi_certificate_request_ready_status{condition="False",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 1 + certmanager_csi_certificate_request_ready_status{condition="True",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 0 + certmanager_csi_certificate_request_ready_status{condition="Unknown",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 0 +`, + expectedRenewalTime: ` + certmanager_csi_certificate_request_renewal_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 0 +`, + }, + "certificate with expiry and status Unknown should give an expiry and Unknown status": { + cr: gen.CertificateRequest("test-certificate-request", + gen.SetCertificateRequestNamespace("test-ns"), + gen.SetCertificateRequestIssuer(cmmeta.ObjectReference{ + Name: "test-issuer", + Kind: "test-issuer-kind", + Group: "test-issuer-group", + }), + gen.SetCertificateRequestStatusCondition(cmapi.CertificateRequestCondition{ + Type: cmapi.CertificateRequestConditionReady, + Status: cmmeta.ConditionUnknown, + }), + ), + notAfter: time.Unix(99999, 0), + + expectedExpiry: ` + certmanager_csi_certificate_request_expiration_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 99999 +`, + expectedReady: ` + certmanager_csi_certificate_request_ready_status{condition="False",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 0 + certmanager_csi_certificate_request_ready_status{condition="True",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 0 + certmanager_csi_certificate_request_ready_status{condition="Unknown",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 1 +`, + expectedRenewalTime: ` + certmanager_csi_certificate_request_renewal_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 0 +`, + }, + "certificate with expiry and ready status and renew before": { + cr: gen.CertificateRequest("test-certificate-request", + gen.SetCertificateRequestNamespace("test-ns"), + gen.SetCertificateRequestIssuer(cmmeta.ObjectReference{ + Name: "test-issuer", + Kind: "test-issuer-kind", + Group: "test-issuer-group", + }), + gen.SetCertificateRequestStatusCondition(cmapi.CertificateRequestCondition{ + Type: cmapi.CertificateRequestConditionReady, + Status: cmmeta.ConditionTrue, + }), + ), + notAfter: time.Unix(2208988804, 0), + renewBefore: time.Unix(2108988804, 0), + + expectedExpiry: ` + certmanager_csi_certificate_request_expiration_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 2.208988804e+09 +`, + expectedReady: ` + certmanager_csi_certificate_request_ready_status{condition="False",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 0 + certmanager_csi_certificate_request_ready_status{condition="True",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 1 + certmanager_csi_certificate_request_ready_status{condition="Unknown",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 0 +`, + expectedRenewalTime: ` + certmanager_csi_certificate_request_renewal_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 2.108988804e+09 +`, + }, + } + for n, test := range tests { + t.Run(n, func(t *testing.T) { + testLog := testr.New(t) + m := New(&testLog) + m.UpdateCertificateRequest(test.cr, test.notAfter, test.renewBefore) + + if err := testutil.CollectAndCompare(m.certificateRequestExpiryTimeSeconds, + strings.NewReader(expiryMetadata+test.expectedExpiry), + "certmanager_csi_certificate_request_expiration_timestamp_seconds", + ); err != nil { + t.Errorf("unexpected collecting result:\n%s", err) + } + + if err := testutil.CollectAndCompare(m.certificateRequestRenewalTimeSeconds, + strings.NewReader(renewalTimeMetadata+test.expectedRenewalTime), + "certmanager_csi_certificate_request_renewal_timestamp_seconds", + ); err != nil { + t.Errorf("unexpected collecting result:\n%s", err) + } + + if err := testutil.CollectAndCompare(m.certificateRequestReadyStatus, + strings.NewReader(readyMetadata+test.expectedReady), + "certmanager_csi_certificate_request_ready_status", + ); err != nil { + t.Errorf("unexpected collecting result:\n%s", err) + } + }) + } +} + +func TestCertificateRequestCache(t *testing.T) { + testLog := testr.New(t) + m := New(&testLog) + + // private key to be used to generate X509 certificate + privKey := testcrypto.MustCreatePEMPrivateKey(t) + certTemplate := &cmapi.Certificate{ + ObjectMeta: metav1.ObjectMeta{Namespace: "testns", Name: "test"}, + Spec: cmapi.CertificateSpec{ + CommonName: "test.example.com", + }, + } + notBefore := time.Unix(0, 0) + notAfter1, notAfter2, notAfter3 := + time.Unix(100, 0), time.Unix(200, 0), time.Unix(300, 0) + renew1, renew2, renew3 := + time.Unix(50, 0), time.Unix(150, 0), time.Unix(250, 0) + + cr1 := gen.CertificateRequest("cr1", + gen.SetCertificateRequestNamespace("testns"), + gen.SetCertificateRequestIssuer(cmmeta.ObjectReference{ + Name: "test-issuer", + Kind: "test-issuer-kind", + Group: "test-issuer-group", + }), + gen.SetCertificateRequestStatusCondition(cmapi.CertificateRequestCondition{ + Type: cmapi.CertificateRequestConditionReady, + Status: cmmeta.ConditionUnknown, + }), + gen.SetCertificateRequestCertificate( + testcrypto.MustCreateCertWithNotBeforeAfter(t, privKey, certTemplate, notBefore, notAfter1)), + ) + cr2 := gen.CertificateRequest("cr2", + gen.SetCertificateRequestNamespace("testns"), + gen.SetCertificateRequestIssuer(cmmeta.ObjectReference{ + Name: "test-issuer", + Kind: "test-issuer-kind", + Group: "test-issuer-group", + }), + gen.SetCertificateRequestStatusCondition(cmapi.CertificateRequestCondition{ + Type: cmapi.CertificateRequestConditionReady, + Status: cmmeta.ConditionTrue, + }), + gen.SetCertificateRequestCertificate( + testcrypto.MustCreateCertWithNotBeforeAfter(t, privKey, certTemplate, notBefore, notAfter2)), + ) + cr3 := gen.CertificateRequest("cr3", + gen.SetCertificateRequestNamespace("testns"), + gen.SetCertificateRequestIssuer(cmmeta.ObjectReference{ + Name: "test-issuer", + Kind: "test-issuer-kind", + Group: "test-issuer-group", + }), + gen.SetCertificateRequestStatusCondition(cmapi.CertificateRequestCondition{ + Type: cmapi.CertificateRequestConditionReady, + Status: cmmeta.ConditionFalse, + }), + gen.SetCertificateRequestCertificate( + testcrypto.MustCreateCertWithNotBeforeAfter(t, privKey, certTemplate, notBefore, notAfter3)), + ) + + // Observe all three Certificate metrics + m.UpdateCertificateRequest(cr1, notAfter1, renew1) + m.UpdateCertificateRequest(cr2, notAfter2, renew2) + m.UpdateCertificateRequest(cr3, notAfter3, renew3) + + // Check all three metrics exist + if err := testutil.CollectAndCompare(m.certificateRequestReadyStatus, + strings.NewReader(readyMetadata+` + certmanager_csi_certificate_request_ready_status{condition="False",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr1",namespace="testns"} 0 + certmanager_csi_certificate_request_ready_status{condition="False",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr2",namespace="testns"} 0 + certmanager_csi_certificate_request_ready_status{condition="False",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr3",namespace="testns"} 1 + certmanager_csi_certificate_request_ready_status{condition="True",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr1",namespace="testns"} 0 + certmanager_csi_certificate_request_ready_status{condition="True",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr2",namespace="testns"} 1 + certmanager_csi_certificate_request_ready_status{condition="True",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr3",namespace="testns"} 0 + certmanager_csi_certificate_request_ready_status{condition="Unknown",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr1",namespace="testns"} 1 + certmanager_csi_certificate_request_ready_status{condition="Unknown",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr2",namespace="testns"} 0 + certmanager_csi_certificate_request_ready_status{condition="Unknown",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr3",namespace="testns"} 0 +`), + "certmanager_csi_certificate_request_ready_status", + ); err != nil { + t.Errorf("unexpected collecting result:\n%s", err) + } + if err := testutil.CollectAndCompare(m.certificateRequestExpiryTimeSeconds, + strings.NewReader(expiryMetadata+` + certmanager_csi_certificate_request_expiration_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr1",namespace="testns"} 100 + certmanager_csi_certificate_request_expiration_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr2",namespace="testns"} 200 + certmanager_csi_certificate_request_expiration_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr3",namespace="testns"} 300 +`), + "certmanager_csi_certificate_request_expiration_timestamp_seconds", + ); err != nil { + t.Errorf("unexpected collecting result:\n%s", err) + } + + if err := testutil.CollectAndCompare(m.certificateRequestRenewalTimeSeconds, + strings.NewReader(renewalTimeMetadata+` + certmanager_csi_certificate_request_renewal_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr1",namespace="testns"} 50 + certmanager_csi_certificate_request_renewal_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr2",namespace="testns"} 150 + certmanager_csi_certificate_request_renewal_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr3",namespace="testns"} 250 +`), + "certmanager_csi_certificate_request_renewal_timestamp_seconds", + ); err != nil { + t.Errorf("unexpected collecting result:\n%s", err) + } + + // Remove second certificate and check not exists + m.RemoveCertificateRequest("cr2", "testns") + if err := testutil.CollectAndCompare(m.certificateRequestReadyStatus, + strings.NewReader(readyMetadata+` + certmanager_csi_certificate_request_ready_status{condition="False",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr1",namespace="testns"} 0 + certmanager_csi_certificate_request_ready_status{condition="False",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr3",namespace="testns"} 1 + certmanager_csi_certificate_request_ready_status{condition="True",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr1",namespace="testns"} 0 + certmanager_csi_certificate_request_ready_status{condition="True",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr3",namespace="testns"} 0 + certmanager_csi_certificate_request_ready_status{condition="Unknown",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr1",namespace="testns"} 1 + certmanager_csi_certificate_request_ready_status{condition="Unknown",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr3",namespace="testns"} 0 +`), + "certmanager_csi_certificate_request_ready_status", + ); err != nil { + t.Errorf("unexpected collecting result:\n%s", err) + } + if err := testutil.CollectAndCompare(m.certificateRequestExpiryTimeSeconds, + strings.NewReader(expiryMetadata+` + certmanager_csi_certificate_request_expiration_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr1",namespace="testns"} 100 + certmanager_csi_certificate_request_expiration_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr3",namespace="testns"} 300 +`), + "certmanager_csi_certificate_request_expiration_timestamp_seconds", + ); err != nil { + t.Errorf("unexpected collecting result:\n%s", err) + } + + // Remove all Certificates (even is already removed) and observe no Certificates + m.RemoveCertificateRequest("cr1", "testns") + m.RemoveCertificateRequest("cr2", "testns") + m.RemoveCertificateRequest("cr3", "testns") + if testutil.CollectAndCount(m.certificateRequestReadyStatus, "certmanager_csi_certificate_request_ready_status") != 0 { + t.Errorf("unexpected collecting result") + } + if testutil.CollectAndCount(m.certificateRequestExpiryTimeSeconds, "certmanager_csi_certificate_request_expiration_timestamp_seconds") != 0 { + t.Errorf("unexpected collecting result") + } +} diff --git a/metrics/metrics.go b/metrics/metrics.go new file mode 100644 index 0000000..e2f0b06 --- /dev/null +++ b/metrics/metrics.go @@ -0,0 +1,192 @@ +/* +Copyright 2024 The cert-manager Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "net" + "net/http" + "time" + + "github.com/go-logr/logr" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/collectors" + "github.com/prometheus/client_golang/prometheus/promhttp" +) + +const ( + // Namespace is the namespace for csi-lib metric names + namespace = "certmanager" + subsystem = "csi" + prometheusMetricsServerReadTimeout = 8 * time.Second + prometheusMetricsServerWriteTimeout = 8 * time.Second + prometheusMetricsServerMaxHeaderBytes = 1 << 20 // 1 MiB +) + +// Metrics is designed to be a shared object for updating the metrics exposed by csi-lib +type Metrics struct { + log logr.Logger + registry *prometheus.Registry + + certificateRequestExpiryTimeSeconds *prometheus.GaugeVec + certificateRequestRenewalTimeSeconds *prometheus.GaugeVec + certificateRequestReadyStatus *prometheus.GaugeVec + driverIssueCallCount *prometheus.CounterVec + driverIssueErrorCount *prometheus.CounterVec + managedVolumeCount *prometheus.CounterVec + managedCertificateCount *prometheus.CounterVec +} + +// New creates a Metrics struct and populates it with prometheus metric types. +func New(logger *logr.Logger) *Metrics { + var ( + certificateRequestExpiryTimeSeconds = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "certificate_request_expiration_timestamp_seconds", + Help: "The date after which the certificate request expires. Expressed as a Unix Epoch Time.", + }, + []string{"name", "namespace", "issuer_name", "issuer_kind", "issuer_group"}, + ) + + certificateRequestRenewalTimeSeconds = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "certificate_request_renewal_timestamp_seconds", + Help: "The number of seconds before expiration time the certificate request should renew.", + }, + []string{"name", "namespace", "issuer_name", "issuer_kind", "issuer_group"}, + ) + + certificateRequestReadyStatus = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "certificate_request_ready_status", + Help: "The ready status of the certificate request.", + }, + []string{"name", "namespace", "condition", "issuer_name", "issuer_kind", "issuer_group"}, + ) + + driverIssueCallCount = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "driver_issue_call_count", + Help: "The number of issue() calls made by the driver.", + }, + []string{"node", "volume"}, + ) + + driverIssueErrorCount = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "driver_issue_error_count", + Help: "The number of errors encountered during the driver issue() calls.", + }, + []string{"node", "volume"}, + ) + + managedVolumeCount = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "managed_volume_count", + Help: "The number of volume managed by the csi driver.", + }, + []string{"node"}, + ) + + managedCertificateCount = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "managed_certificate_count", + Help: "The number of certificates managed by the csi driver.", + }, + []string{"node"}, + ) + ) + + // Create Registry and register the recommended collectors + registry := prometheus.NewRegistry() + registry.MustRegister( + collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}), + collectors.NewGoCollector(), + ) + // Create server and register Prometheus metrics handler + m := &Metrics{ + log: logger.WithName("metrics"), + registry: registry, + + certificateRequestExpiryTimeSeconds: certificateRequestExpiryTimeSeconds, + certificateRequestRenewalTimeSeconds: certificateRequestRenewalTimeSeconds, + certificateRequestReadyStatus: certificateRequestReadyStatus, + driverIssueCallCount: driverIssueCallCount, + driverIssueErrorCount: driverIssueErrorCount, + managedVolumeCount: managedVolumeCount, + managedCertificateCount: managedCertificateCount, + } + + return m +} + +// NewServer registers Prometheus metrics and returns a new Prometheus metrics HTTP server. +func (m *Metrics) NewServer(ln net.Listener) *http.Server { + m.registry.MustRegister(m.certificateRequestExpiryTimeSeconds) + m.registry.MustRegister(m.certificateRequestRenewalTimeSeconds) + m.registry.MustRegister(m.certificateRequestReadyStatus) + m.registry.MustRegister(m.driverIssueCallCount) + m.registry.MustRegister(m.driverIssueErrorCount) + m.registry.MustRegister(m.managedVolumeCount) + m.registry.MustRegister(m.managedCertificateCount) + + mux := http.NewServeMux() + mux.Handle("/metrics", promhttp.HandlerFor(m.registry, promhttp.HandlerOpts{})) + + server := &http.Server{ + Addr: ln.Addr().String(), + ReadTimeout: prometheusMetricsServerReadTimeout, + WriteTimeout: prometheusMetricsServerWriteTimeout, + MaxHeaderBytes: prometheusMetricsServerMaxHeaderBytes, + Handler: mux, + } + + return server +} + +// IncrementIssueCallCount will increase the issue call counter for the driver. +func (m *Metrics) IncrementIssueCallCount(nodeNameHash, volumeID string) { + m.driverIssueCallCount.WithLabelValues(nodeNameHash, volumeID).Inc() +} + +// IncrementIssueErrorCount will increase count of errors during issue call of the driver. +func (m *Metrics) IncrementIssueErrorCount(nodeNameHash, volumeID string) { + m.driverIssueErrorCount.WithLabelValues(nodeNameHash, volumeID).Inc() +} + +// IncrementManagedVolumeCount will increase the managed volume counter for the driver. +func (m *Metrics) IncrementManagedVolumeCount(nodeNameHash string) { + m.managedVolumeCount.WithLabelValues(nodeNameHash).Inc() +} + +// IncrementManagedCertificateCount will increase the managed certificate count for the driver. +func (m *Metrics) IncrementManagedCertificateCount(nodeNameHash string) { + m.managedCertificateCount.WithLabelValues(nodeNameHash).Inc() +} diff --git a/test/driver/driver_testing.go b/test/driver/driver_testing.go index 165a204..ddc1fc3 100644 --- a/test/driver/driver_testing.go +++ b/test/driver/driver_testing.go @@ -36,6 +36,7 @@ import ( "github.com/cert-manager/csi-lib/driver" "github.com/cert-manager/csi-lib/manager" "github.com/cert-manager/csi-lib/metadata" + "github.com/cert-manager/csi-lib/metrics" "github.com/cert-manager/csi-lib/storage" ) @@ -45,6 +46,7 @@ type Options struct { Log *logr.Logger Client cmclient.Interface Mounter mount.Interface + Metrics *metrics.Metrics NodeID string MaxRequestsPerVolume int @@ -110,6 +112,7 @@ func Run(t *testing.T, opts Options) (Options, csi.NodeClient, func()) { Clock: opts.Clock, Log: opts.Log, NodeID: opts.NodeID, + Metrics: opts.Metrics, MaxRequestsPerVolume: opts.MaxRequestsPerVolume, GeneratePrivateKey: opts.GeneratePrivateKey, GenerateRequest: opts.GenerateRequest, diff --git a/test/integration/metrics_test.go b/test/integration/metrics_test.go new file mode 100644 index 0000000..34a3923 --- /dev/null +++ b/test/integration/metrics_test.go @@ -0,0 +1,235 @@ +package integration + +import ( + "context" + "crypto" + "crypto/x509" + "fmt" + "io" + "net" + "net/http" + "os" + "strings" + "testing" + "time" + + "github.com/container-storage-interface/spec/lib/go/csi" + "github.com/go-logr/logr/testr" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" + fakeclock "k8s.io/utils/clock/testing" + + cmapi "github.com/cert-manager/cert-manager/pkg/apis/certmanager/v1" + cmmeta "github.com/cert-manager/cert-manager/pkg/apis/meta/v1" + testcrypto "github.com/cert-manager/cert-manager/test/unit/crypto" + "github.com/cert-manager/csi-lib/manager" + "github.com/cert-manager/csi-lib/metadata" + "github.com/cert-manager/csi-lib/metrics" + "github.com/cert-manager/csi-lib/storage" + testdriver "github.com/cert-manager/csi-lib/test/driver" + testutil "github.com/cert-manager/csi-lib/test/util" +) + +var ( + testMetrics = func(ctx context.Context, metricsEndpoint, expectedOutput string) error { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, metricsEndpoint, nil) + if err != nil { + return err + } + resp, err := http.DefaultClient.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + + output, err := io.ReadAll(resp.Body) + if err != nil { + return err + } + + trimmedOutput := strings.SplitN(string(output), "# HELP go_gc_duration_seconds", 2)[0] + if strings.TrimSpace(trimmedOutput) != strings.TrimSpace(expectedOutput) { + return fmt.Errorf("got unexpected metrics output\nexp:\n%s\ngot:\n%s\n", + expectedOutput, trimmedOutput) + } + + return nil + } + + waitForMetrics = func(t *testing.T, ctx context.Context, metricsEndpoint, expectedOutput string) { + var lastErr error + err := wait.PollUntilContextCancel(ctx, time.Millisecond*100, true, func(ctx context.Context) (done bool, err error) { + if err := testMetrics(ctx, metricsEndpoint, expectedOutput); err != nil { + lastErr = err + return false, nil + } + + return true, nil + }) + if err != nil { + t.Fatalf("%s: failed to wait for expected metrics to be exposed: %s", err, lastErr) + } + } +) + +func TestMetricsServer(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + defer cancel() + + testLog := testr.New(t) + testNamespace := "test-ns" + + // Build metrics handler, and start metrics server with a random available port + metricsHandler := metrics.New(&testLog) + metricsLn, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + t.Fatal(err) + } + metricsServer := metricsHandler.NewServer(metricsLn) + errCh := make(chan error) + go func() { + defer close(errCh) + testLog.Info("starting metrics server", "address", metricsLn.Addr()) + if err := metricsServer.Serve(metricsLn); err != http.ErrServerClosed { + errCh <- err + } + }() + defer func() { + // allow a timeout for graceful shutdown + shutdownCtx, cancel := context.WithTimeout(ctx, time.Second*5) + defer cancel() + + if err := metricsServer.Shutdown(shutdownCtx); err != nil { + t.Fatal(err) + } + err := <-errCh + if err != nil { + t.Fatal(err) + } + }() + + // Build and start the driver + store := storage.NewMemoryFS() + clock := fakeclock.NewFakeClock(time.Now()) + opts, cl, stop := testdriver.Run(t, testdriver.Options{ + Store: store, + Clock: clock, + Metrics: metricsHandler, + Log: &testLog, + GeneratePrivateKey: func(meta metadata.Metadata) (crypto.PrivateKey, error) { + return nil, nil + }, + GenerateRequest: func(meta metadata.Metadata) (*manager.CertificateRequestBundle, error) { + return &manager.CertificateRequestBundle{ + Namespace: testNamespace, + IssuerRef: cmmeta.ObjectReference{ + Name: "test-issuer", + Kind: "test-issuer-kind", + Group: "test-issuer-group", + }, + }, nil + }, + SignRequest: func(meta metadata.Metadata, key crypto.PrivateKey, request *x509.CertificateRequest) (csr []byte, err error) { + return []byte{}, nil + }, + WriteKeypair: func(meta metadata.Metadata, key crypto.PrivateKey, chain []byte, ca []byte) error { + store.WriteFiles(meta, map[string][]byte{ + "ca": ca, + "cert": chain, + }) + nextIssuanceTime := clock.Now().Add(time.Hour) + meta.NextIssuanceTime = &nextIssuanceTime + return store.WriteMetadata(meta.VolumeID, meta) + }, + }) + defer stop() + + // Should expose no additional metrics + metricsEndpoint := fmt.Sprintf("http://%s/metrics", metricsServer.Addr) + waitForMetrics(t, ctx, metricsEndpoint, "") + + // Create a self-signed Certificate and wait for it to be issued + privKey := testcrypto.MustCreatePEMPrivateKey(t) + certTemplate := &cmapi.Certificate{ + ObjectMeta: metav1.ObjectMeta{Namespace: testNamespace, Name: "test"}, + Spec: cmapi.CertificateSpec{ + CommonName: "test.example.com", + }, + } + notBefore, notAfter := time.Unix(0, 0), time.Unix(300, 0) // renewal time will be 200 + selfSignedCertBytesWithValidity := testcrypto.MustCreateCertWithNotBeforeAfter(t, privKey, certTemplate, notBefore, notAfter) + go testutil.IssueOneRequest(ctx, t, opts.Client, testNamespace, selfSignedCertBytesWithValidity, []byte("ca bytes")) + + // Spin up a test pod + tmpDir, err := os.MkdirTemp("", "*") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(tmpDir) + _, err = cl.NodePublishVolume(ctx, &csi.NodePublishVolumeRequest{ + VolumeId: "test-vol", + VolumeContext: map[string]string{ + "csi.storage.k8s.io/ephemeral": "true", + "csi.storage.k8s.io/pod.name": "the-pod-name", + "csi.storage.k8s.io/pod.namespace": testNamespace, + }, + TargetPath: tmpDir, + Readonly: true, + }) + if err != nil { + t.Fatal(err) + } + + // Get the CSR name + req, err := testutil.WaitAndGetOneCertificateRequestInNamespace(ctx, opts.Client, testNamespace) + if err != nil { + t.Fatal(err) + } + + // Should expose that CertificateRequest as ready with expiry and renewal time + // node="f56fd9f8b" is the hash value of "test-node" defined in driver_testing.go + expectedOutputTemplate := `# HELP certmanager_csi_certificate_request_expiration_timestamp_seconds The date after which the certificate request expires. Expressed as a Unix Epoch Time. +# TYPE certmanager_csi_certificate_request_expiration_timestamp_seconds gauge +certmanager_csi_certificate_request_expiration_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-cr-name",namespace="test-ns"} 300 +# HELP certmanager_csi_certificate_request_ready_status The ready status of the certificate request. +# TYPE certmanager_csi_certificate_request_ready_status gauge +certmanager_csi_certificate_request_ready_status{condition="False",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-cr-name",namespace="test-ns"} 0 +certmanager_csi_certificate_request_ready_status{condition="True",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-cr-name",namespace="test-ns"} 1 +certmanager_csi_certificate_request_ready_status{condition="Unknown",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-cr-name",namespace="test-ns"} 0 +# HELP certmanager_csi_certificate_request_renewal_timestamp_seconds The number of seconds before expiration time the certificate request should renew. +# TYPE certmanager_csi_certificate_request_renewal_timestamp_seconds gauge +certmanager_csi_certificate_request_renewal_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-cr-name",namespace="test-ns"} 200 +# HELP certmanager_csi_driver_issue_call_count The number of issue() calls made by the driver. +# TYPE certmanager_csi_driver_issue_call_count counter +certmanager_csi_driver_issue_call_count{node="f56fd9f8b",volume="test-vol"} 1 +# HELP certmanager_csi_managed_certificate_count The number of certificates managed by the csi driver. +# TYPE certmanager_csi_managed_certificate_count counter +certmanager_csi_managed_certificate_count{node="f56fd9f8b"} 1 +# HELP certmanager_csi_managed_volume_count The number of volume managed by the csi driver. +# TYPE certmanager_csi_managed_volume_count counter +certmanager_csi_managed_volume_count{node="f56fd9f8b"} 1 +` + waitForMetrics(t, ctx, metricsEndpoint, strings.ReplaceAll(expectedOutputTemplate, "test-cr-name", req.Name)) + + // Delete the test pod + _, err = cl.NodeUnpublishVolume(ctx, &csi.NodeUnpublishVolumeRequest{ + VolumeId: "test-vol", + TargetPath: tmpDir, + }) + if err != nil { + t.Fatal(err) + } + + // Should expose no CertificateRequest and only metrics counters + waitForMetrics(t, ctx, metricsEndpoint, `# HELP certmanager_csi_driver_issue_call_count The number of issue() calls made by the driver. +# TYPE certmanager_csi_driver_issue_call_count counter +certmanager_csi_driver_issue_call_count{node="f56fd9f8b",volume="test-vol"} 1 +# HELP certmanager_csi_managed_certificate_count The number of certificates managed by the csi driver. +# TYPE certmanager_csi_managed_certificate_count counter +certmanager_csi_managed_certificate_count{node="f56fd9f8b"} 1 +# HELP certmanager_csi_managed_volume_count The number of volume managed by the csi driver. +# TYPE certmanager_csi_managed_volume_count counter +certmanager_csi_managed_volume_count{node="f56fd9f8b"} 1 +`) + +} diff --git a/test/util/testutil.go b/test/util/testutil.go index d405e4a..6d9dd95 100644 --- a/test/util/testutil.go +++ b/test/util/testutil.go @@ -29,7 +29,7 @@ import ( "k8s.io/apimachinery/pkg/util/wait" ) -func waitAndGetOneCertificateRequestInNamespace(ctx context.Context, client cmclient.Interface, ns string) (*cmapi.CertificateRequest, error) { +func WaitAndGetOneCertificateRequestInNamespace(ctx context.Context, client cmclient.Interface, ns string) (*cmapi.CertificateRequest, error) { var req *cmapi.CertificateRequest if err := wait.PollUntilContextCancel(ctx, time.Millisecond*50, true, func(ctx context.Context) (done bool, err error) { reqs, err := client.CertmanagerV1().CertificateRequests(ns).List(ctx, metav1.ListOptions{}) @@ -53,7 +53,7 @@ func waitAndGetOneCertificateRequestInNamespace(ctx context.Context, client cmcl func IssueOneRequest(ctx context.Context, t *testing.T, client cmclient.Interface, namespace string, cert, ca []byte) { if err := func() error { - req, err := waitAndGetOneCertificateRequestInNamespace(ctx, client, namespace) + req, err := WaitAndGetOneCertificateRequestInNamespace(ctx, client, namespace) if err != nil { return err } @@ -80,7 +80,7 @@ func IssueOneRequest(ctx context.Context, t *testing.T, client cmclient.Interfac func SetCertificateRequestConditions(ctx context.Context, t *testing.T, client cmclient.Interface, namespace string, conditions ...cmapi.CertificateRequestCondition) { if err := func() error { - req, err := waitAndGetOneCertificateRequestInNamespace(ctx, client, namespace) + req, err := WaitAndGetOneCertificateRequestInNamespace(ctx, client, namespace) if err != nil { return err } From 8ed780cbe3b174956e33a954d450178f08b47e07 Mon Sep 17 00:00:00 2001 From: Jing Liu Date: Tue, 4 Feb 2025 16:23:02 -0800 Subject: [PATCH 2/7] Resolve comments for metrics package fixes: #60 Signed-off-by: Jing Liu --- .DS_Store | Bin 0 -> 8196 bytes manager/manager.go | 3 ++- metrics/certificaterequest_test.go | 5 ++-- metrics/metrics.go | 38 +++++++---------------------- test/integration/metrics_test.go | 12 +++++++-- 5 files changed, 24 insertions(+), 34 deletions(-) create mode 100644 .DS_Store diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..fcabe824e9c646fabb048f1fb5a47f2ba38c8687 GIT binary patch literal 8196 zcmeHMziSjh6n+!W+(8f`DH5?+5Cb-9#6qw+!`TUf30SDnO)ezSJ2$@+jSx-|6toC} zjg_U9*l8Is_WlE+;9n37*jV_znPKm}{c)BNZU%PWvhSPuzWHWuCbJoaZZ72K<||hz9kkwJY7HLYRvt;|XO+q+YnzXmLST30$ylrP|A?W-7p`xt>$%{~1S+$t~mKVI5tuDJ2f zV7mP;!)P5H&`=fp4=AQKhNVUGTFLlE9wD_Hy1aMe+7atTrXA@>IZEp}&3Y=d2sLwb zM=Lq=L*}H0ubb1|@$hvedr@BVZZU5SV{w<(c{C7XYH9w#b`eJE*m#=i$h9T)^ulRf z5$mdAWMZpmAvaUZ5Ua`{q^9%dwthczb0vrB&X#hN(&KjX8uZM=rx^1$=BRzD9)+SD zMryk9Uig%0OZwCcr*yeLTB@`LH3>#9#(J8W$1{=jNg<@Bz0awpOgmChIZErX>s!Un zB`|7#p15_L=NWQR!=K%BT}$gq_M*Jz{eVu=5O^0cmsc@9H}RW}@wA>@Vc(6HBc-P8 zZ>cXO?Mj9JMi=O1&HY#oxK5Ap`Tt`1_y2yhCiFQt&^Hd4N@J=qiSGKdb*|%l){b!9 z!o`Jg9T#mC-0X22(H_SU(|;J^IRcdH)P3x@h&L$v@FAeqYhRbUNiUqr_BD9ziJ;Q>i_@% literal 0 HcmV?d00001 diff --git a/manager/manager.go b/manager/manager.go index c8c583e..a1e29d3 100644 --- a/manager/manager.go +++ b/manager/manager.go @@ -34,6 +34,7 @@ import ( cminformers "github.com/cert-manager/cert-manager/pkg/client/informers/externalversions" cmlisters "github.com/cert-manager/cert-manager/pkg/client/listers/certmanager/v1" "github.com/go-logr/logr" + "github.com/prometheus/client_golang/prometheus" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" @@ -131,7 +132,7 @@ func NewManager(opts Options) (*Manager, error) { return nil, errors.New("log must be set") } if opts.Metrics == nil { - opts.Metrics = metrics.New(opts.Log) + opts.Metrics = metrics.New(opts.Log, prometheus.NewRegistry()) } if opts.MetadataReader == nil { return nil, errors.New("MetadataReader must be set") diff --git a/metrics/certificaterequest_test.go b/metrics/certificaterequest_test.go index 201b461..1ec7f5b 100644 --- a/metrics/certificaterequest_test.go +++ b/metrics/certificaterequest_test.go @@ -22,6 +22,7 @@ import ( "time" "github.com/go-logr/logr/testr" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/testutil" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -188,7 +189,7 @@ func TestCertificateRequestMetrics(t *testing.T) { for n, test := range tests { t.Run(n, func(t *testing.T) { testLog := testr.New(t) - m := New(&testLog) + m := New(&testLog, prometheus.NewRegistry()) m.UpdateCertificateRequest(test.cr, test.notAfter, test.renewBefore) if err := testutil.CollectAndCompare(m.certificateRequestExpiryTimeSeconds, @@ -217,7 +218,7 @@ func TestCertificateRequestMetrics(t *testing.T) { func TestCertificateRequestCache(t *testing.T) { testLog := testr.New(t) - m := New(&testLog) + m := New(&testLog, prometheus.NewRegistry()) // private key to be used to generate X509 certificate privKey := testcrypto.MustCreatePEMPrivateKey(t) diff --git a/metrics/metrics.go b/metrics/metrics.go index e2f0b06..f686ebe 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -17,23 +17,17 @@ limitations under the License. package metrics import ( - "net" "net/http" - "time" "github.com/go-logr/logr" "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/collectors" "github.com/prometheus/client_golang/prometheus/promhttp" ) const ( // Namespace is the namespace for csi-lib metric names - namespace = "certmanager" - subsystem = "csi" - prometheusMetricsServerReadTimeout = 8 * time.Second - prometheusMetricsServerWriteTimeout = 8 * time.Second - prometheusMetricsServerMaxHeaderBytes = 1 << 20 // 1 MiB + namespace = "certmanager" + subsystem = "csi" ) // Metrics is designed to be a shared object for updating the metrics exposed by csi-lib @@ -51,7 +45,7 @@ type Metrics struct { } // New creates a Metrics struct and populates it with prometheus metric types. -func New(logger *logr.Logger) *Metrics { +func New(logger *logr.Logger, registry *prometheus.Registry) *Metrics { var ( certificateRequestExpiryTimeSeconds = prometheus.NewGaugeVec( prometheus.GaugeOpts{ @@ -124,12 +118,6 @@ func New(logger *logr.Logger) *Metrics { ) ) - // Create Registry and register the recommended collectors - registry := prometheus.NewRegistry() - registry.MustRegister( - collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}), - collectors.NewGoCollector(), - ) // Create server and register Prometheus metrics handler m := &Metrics{ log: logger.WithName("metrics"), @@ -144,11 +132,6 @@ func New(logger *logr.Logger) *Metrics { managedCertificateCount: managedCertificateCount, } - return m -} - -// NewServer registers Prometheus metrics and returns a new Prometheus metrics HTTP server. -func (m *Metrics) NewServer(ln net.Listener) *http.Server { m.registry.MustRegister(m.certificateRequestExpiryTimeSeconds) m.registry.MustRegister(m.certificateRequestRenewalTimeSeconds) m.registry.MustRegister(m.certificateRequestReadyStatus) @@ -157,18 +140,15 @@ func (m *Metrics) NewServer(ln net.Listener) *http.Server { m.registry.MustRegister(m.managedVolumeCount) m.registry.MustRegister(m.managedCertificateCount) + return m +} + +// DefaultHandler returns a default prometheus metrics HTTP handler +func (m *Metrics) DefaultHandler() http.Handler { mux := http.NewServeMux() mux.Handle("/metrics", promhttp.HandlerFor(m.registry, promhttp.HandlerOpts{})) - server := &http.Server{ - Addr: ln.Addr().String(), - ReadTimeout: prometheusMetricsServerReadTimeout, - WriteTimeout: prometheusMetricsServerWriteTimeout, - MaxHeaderBytes: prometheusMetricsServerMaxHeaderBytes, - Handler: mux, - } - - return server + return mux } // IncrementIssueCallCount will increase the issue call counter for the driver. diff --git a/test/integration/metrics_test.go b/test/integration/metrics_test.go index 34a3923..f578fc2 100644 --- a/test/integration/metrics_test.go +++ b/test/integration/metrics_test.go @@ -15,6 +15,7 @@ import ( "github.com/container-storage-interface/spec/lib/go/csi" "github.com/go-logr/logr/testr" + "github.com/prometheus/client_golang/prometheus" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/wait" fakeclock "k8s.io/utils/clock/testing" @@ -80,12 +81,19 @@ func TestMetricsServer(t *testing.T) { testNamespace := "test-ns" // Build metrics handler, and start metrics server with a random available port - metricsHandler := metrics.New(&testLog) + metricsHandler := metrics.New(&testLog, prometheus.NewRegistry()) metricsLn, err := net.Listen("tcp", "127.0.0.1:0") if err != nil { t.Fatal(err) } - metricsServer := metricsHandler.NewServer(metricsLn) + metricsServer := &http.Server{ + Addr: metricsLn.Addr().String(), + ReadTimeout: 8 * time.Second, + WriteTimeout: 8 * time.Second, + MaxHeaderBytes: 1 << 20, // 1 MiB + Handler: metricsHandler.DefaultHandler(), + } + errCh := make(chan error) go func() { defer close(errCh) From 2044503c91db06f10f2b1b310d96708f31b77b4f Mon Sep 17 00:00:00 2001 From: Jing Liu Date: Fri, 29 Aug 2025 11:12:54 -0700 Subject: [PATCH 3/7] fix promlinter and boilerplate issues Signed-off-by: Jing Liu --- .DS_Store | Bin 8196 -> 0 bytes manager/manager.go | 10 ++--- metrics/metrics.go | 64 +++++++++++++++---------------- test/integration/metrics_test.go | 52 ++++++++++++++++--------- 4 files changed, 71 insertions(+), 55 deletions(-) delete mode 100644 .DS_Store diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index fcabe824e9c646fabb048f1fb5a47f2ba38c8687..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8196 zcmeHMziSjh6n+!W+(8f`DH5?+5Cb-9#6qw+!`TUf30SDnO)ezSJ2$@+jSx-|6toC} zjg_U9*l8Is_WlE+;9n37*jV_znPKm}{c)BNZU%PWvhSPuzWHWuCbJoaZZ72K<||hz9kkwJY7HLYRvt;|XO+q+YnzXmLST30$ylrP|A?W-7p`xt>$%{~1S+$t~mKVI5tuDJ2f zV7mP;!)P5H&`=fp4=AQKhNVUGTFLlE9wD_Hy1aMe+7atTrXA@>IZEp}&3Y=d2sLwb zM=Lq=L*}H0ubb1|@$hvedr@BVZZU5SV{w<(c{C7XYH9w#b`eJE*m#=i$h9T)^ulRf z5$mdAWMZpmAvaUZ5Ua`{q^9%dwthczb0vrB&X#hN(&KjX8uZM=rx^1$=BRzD9)+SD zMryk9Uig%0OZwCcr*yeLTB@`LH3>#9#(J8W$1{=jNg<@Bz0awpOgmChIZErX>s!Un zB`|7#p15_L=NWQR!=K%BT}$gq_M*Jz{eVu=5O^0cmsc@9H}RW}@wA>@Vc(6HBc-P8 zZ>cXO?Mj9JMi=O1&HY#oxK5Ap`Tt`1_y2yhCiFQt&^Hd4N@J=qiSGKdb*|%l){b!9 z!o`Jg9T#mC-0X22(H_SU(|;J^IRcdH)P3x@h&L$v@FAeqYhRbUNiUqr_BD9ziJ;Q>i_@% diff --git a/manager/manager.go b/manager/manager.go index a1e29d3..77beb9a 100644 --- a/manager/manager.go +++ b/manager/manager.go @@ -400,7 +400,7 @@ func (m *Manager) issue(ctx context.Context, volumeID string) error { log.Info("Processing issuance") // Increase issue count - m.metrics.IncrementIssueCallCount(m.nodeNameHash, volumeID) + m.metrics.IncrementIssueCallCountTotal(m.nodeNameHash, volumeID) if err := m.cleanupStaleRequests(ctx, log, volumeID); err != nil { return fmt.Errorf("cleaning up stale requests: %w", err) @@ -779,7 +779,7 @@ func (m *Manager) ManageVolumeImmediate(ctx context.Context, volumeID string) (m // how to proceed depending on the context this method was called within. if err := m.issue(ctx, volumeID); err != nil { // Increase issue error count - m.metrics.IncrementIssueErrorCount(m.nodeNameHash, volumeID) + m.metrics.IncrementIssueErrorCountTotal(m.nodeNameHash, volumeID) return true, err } } @@ -808,7 +808,7 @@ func (m *Manager) manageVolumeIfNotManaged(volumeID string) (managed bool) { stopCh := make(chan struct{}) m.managedVolumes[volumeID] = stopCh // Increase managed volume count for this driver - m.metrics.IncrementManagedVolumeCount(m.nodeNameHash) + m.metrics.IncrementManagedVolumeCountTotal(m.nodeNameHash) return true } @@ -828,7 +828,7 @@ func (m *Manager) startRenewalRoutine(volumeID string) (started bool) { // Increase managed certificate count for this driver. // We assume each volume will have one certificate to be managed. - m.metrics.IncrementManagedCertificateCount(m.nodeNameHash) + m.metrics.IncrementManagedCertificateCountTotal(m.nodeNameHash) // Create a context that will be cancelled when the stopCh is closed ctx, cancel := context.WithCancel(context.Background()) @@ -866,7 +866,7 @@ func (m *Manager) startRenewalRoutine(volumeID string) (started bool) { if err := m.issue(issueCtx, volumeID); err != nil { log.Error(err, "Failed to issue certificate, retrying after applying exponential backoff") // Increase issue error count - m.metrics.IncrementIssueErrorCount(m.nodeNameHash, volumeID) + m.metrics.IncrementIssueErrorCountTotal(m.nodeNameHash, volumeID) return false, nil } return true, nil diff --git a/metrics/metrics.go b/metrics/metrics.go index f686ebe..47bbc2f 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -38,10 +38,10 @@ type Metrics struct { certificateRequestExpiryTimeSeconds *prometheus.GaugeVec certificateRequestRenewalTimeSeconds *prometheus.GaugeVec certificateRequestReadyStatus *prometheus.GaugeVec - driverIssueCallCount *prometheus.CounterVec - driverIssueErrorCount *prometheus.CounterVec - managedVolumeCount *prometheus.CounterVec - managedCertificateCount *prometheus.CounterVec + driverIssueCallCountTotal *prometheus.CounterVec + driverIssueErrorCountTotal *prometheus.CounterVec + managedVolumeCountTotal *prometheus.CounterVec + managedCertificateCountTotal *prometheus.CounterVec } // New creates a Metrics struct and populates it with prometheus metric types. @@ -77,41 +77,41 @@ func New(logger *logr.Logger, registry *prometheus.Registry) *Metrics { []string{"name", "namespace", "condition", "issuer_name", "issuer_kind", "issuer_group"}, ) - driverIssueCallCount = prometheus.NewCounterVec( + driverIssueCallCountTotal = prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: namespace, Subsystem: subsystem, - Name: "driver_issue_call_count", + Name: "driver_issue_call_count_total", Help: "The number of issue() calls made by the driver.", }, []string{"node", "volume"}, ) - driverIssueErrorCount = prometheus.NewCounterVec( + driverIssueErrorCountTotal = prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: namespace, Subsystem: subsystem, - Name: "driver_issue_error_count", + Name: "driver_issue_error_count_total", Help: "The number of errors encountered during the driver issue() calls.", }, []string{"node", "volume"}, ) - managedVolumeCount = prometheus.NewCounterVec( + managedVolumeCountTotal = prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: namespace, Subsystem: subsystem, - Name: "managed_volume_count", + Name: "managed_volume_count_total", Help: "The number of volume managed by the csi driver.", }, []string{"node"}, ) - managedCertificateCount = prometheus.NewCounterVec( + managedCertificateCountTotal = prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: namespace, Subsystem: subsystem, - Name: "managed_certificate_count", + Name: "managed_certificate_count_total", Help: "The number of certificates managed by the csi driver.", }, []string{"node"}, @@ -126,19 +126,19 @@ func New(logger *logr.Logger, registry *prometheus.Registry) *Metrics { certificateRequestExpiryTimeSeconds: certificateRequestExpiryTimeSeconds, certificateRequestRenewalTimeSeconds: certificateRequestRenewalTimeSeconds, certificateRequestReadyStatus: certificateRequestReadyStatus, - driverIssueCallCount: driverIssueCallCount, - driverIssueErrorCount: driverIssueErrorCount, - managedVolumeCount: managedVolumeCount, - managedCertificateCount: managedCertificateCount, + driverIssueCallCountTotal: driverIssueCallCountTotal, + driverIssueErrorCountTotal: driverIssueErrorCountTotal, + managedVolumeCountTotal: managedVolumeCountTotal, + managedCertificateCountTotal: managedCertificateCountTotal, } m.registry.MustRegister(m.certificateRequestExpiryTimeSeconds) m.registry.MustRegister(m.certificateRequestRenewalTimeSeconds) m.registry.MustRegister(m.certificateRequestReadyStatus) - m.registry.MustRegister(m.driverIssueCallCount) - m.registry.MustRegister(m.driverIssueErrorCount) - m.registry.MustRegister(m.managedVolumeCount) - m.registry.MustRegister(m.managedCertificateCount) + m.registry.MustRegister(m.driverIssueCallCountTotal) + m.registry.MustRegister(m.driverIssueErrorCountTotal) + m.registry.MustRegister(m.managedVolumeCountTotal) + m.registry.MustRegister(m.managedCertificateCountTotal) return m } @@ -151,22 +151,22 @@ func (m *Metrics) DefaultHandler() http.Handler { return mux } -// IncrementIssueCallCount will increase the issue call counter for the driver. -func (m *Metrics) IncrementIssueCallCount(nodeNameHash, volumeID string) { - m.driverIssueCallCount.WithLabelValues(nodeNameHash, volumeID).Inc() +// IncrementIssueCallCountTotal will increase the issue call counter for the driver. +func (m *Metrics) IncrementIssueCallCountTotal(nodeNameHash, volumeID string) { + m.driverIssueCallCountTotal.WithLabelValues(nodeNameHash, volumeID).Inc() } -// IncrementIssueErrorCount will increase count of errors during issue call of the driver. -func (m *Metrics) IncrementIssueErrorCount(nodeNameHash, volumeID string) { - m.driverIssueErrorCount.WithLabelValues(nodeNameHash, volumeID).Inc() +// IncrementIssueErrorCountTotal will increase count of errors during issue call of the driver. +func (m *Metrics) IncrementIssueErrorCountTotal(nodeNameHash, volumeID string) { + m.driverIssueErrorCountTotal.WithLabelValues(nodeNameHash, volumeID).Inc() } -// IncrementManagedVolumeCount will increase the managed volume counter for the driver. -func (m *Metrics) IncrementManagedVolumeCount(nodeNameHash string) { - m.managedVolumeCount.WithLabelValues(nodeNameHash).Inc() +// IncrementManagedVolumeCountTotal will increase the managed volume counter for the driver. +func (m *Metrics) IncrementManagedVolumeCountTotal(nodeNameHash string) { + m.managedVolumeCountTotal.WithLabelValues(nodeNameHash).Inc() } -// IncrementManagedCertificateCount will increase the managed certificate count for the driver. -func (m *Metrics) IncrementManagedCertificateCount(nodeNameHash string) { - m.managedCertificateCount.WithLabelValues(nodeNameHash).Inc() +// IncrementManagedCertificateCountTotal will increase the managed certificate count for the driver. +func (m *Metrics) IncrementManagedCertificateCountTotal(nodeNameHash string) { + m.managedCertificateCountTotal.WithLabelValues(nodeNameHash).Inc() } diff --git a/test/integration/metrics_test.go b/test/integration/metrics_test.go index f578fc2..0facffe 100644 --- a/test/integration/metrics_test.go +++ b/test/integration/metrics_test.go @@ -1,3 +1,19 @@ +/* +Copyright 2025 The cert-manager Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + package integration import ( @@ -207,15 +223,15 @@ certmanager_csi_certificate_request_ready_status{condition="Unknown",issuer_grou # HELP certmanager_csi_certificate_request_renewal_timestamp_seconds The number of seconds before expiration time the certificate request should renew. # TYPE certmanager_csi_certificate_request_renewal_timestamp_seconds gauge certmanager_csi_certificate_request_renewal_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-cr-name",namespace="test-ns"} 200 -# HELP certmanager_csi_driver_issue_call_count The number of issue() calls made by the driver. -# TYPE certmanager_csi_driver_issue_call_count counter -certmanager_csi_driver_issue_call_count{node="f56fd9f8b",volume="test-vol"} 1 -# HELP certmanager_csi_managed_certificate_count The number of certificates managed by the csi driver. -# TYPE certmanager_csi_managed_certificate_count counter -certmanager_csi_managed_certificate_count{node="f56fd9f8b"} 1 -# HELP certmanager_csi_managed_volume_count The number of volume managed by the csi driver. -# TYPE certmanager_csi_managed_volume_count counter -certmanager_csi_managed_volume_count{node="f56fd9f8b"} 1 +# HELP certmanager_csi_driver_issue_call_count_total The number of issue() calls made by the driver. +# TYPE certmanager_csi_driver_issue_call_count_total counter +certmanager_csi_driver_issue_call_count_total{node="f56fd9f8b",volume="test-vol"} 1 +# HELP certmanager_csi_managed_certificate_count_total The number of certificates managed by the csi driver. +# TYPE certmanager_csi_managed_certificate_count_total counter +certmanager_csi_managed_certificate_count_total{node="f56fd9f8b"} 1 +# HELP certmanager_csi_managed_volume_count_total The number of volume managed by the csi driver. +# TYPE certmanager_csi_managed_volume_count_total counter +certmanager_csi_managed_volume_count_total{node="f56fd9f8b"} 1 ` waitForMetrics(t, ctx, metricsEndpoint, strings.ReplaceAll(expectedOutputTemplate, "test-cr-name", req.Name)) @@ -229,15 +245,15 @@ certmanager_csi_managed_volume_count{node="f56fd9f8b"} 1 } // Should expose no CertificateRequest and only metrics counters - waitForMetrics(t, ctx, metricsEndpoint, `# HELP certmanager_csi_driver_issue_call_count The number of issue() calls made by the driver. -# TYPE certmanager_csi_driver_issue_call_count counter -certmanager_csi_driver_issue_call_count{node="f56fd9f8b",volume="test-vol"} 1 -# HELP certmanager_csi_managed_certificate_count The number of certificates managed by the csi driver. -# TYPE certmanager_csi_managed_certificate_count counter -certmanager_csi_managed_certificate_count{node="f56fd9f8b"} 1 -# HELP certmanager_csi_managed_volume_count The number of volume managed by the csi driver. -# TYPE certmanager_csi_managed_volume_count counter -certmanager_csi_managed_volume_count{node="f56fd9f8b"} 1 + waitForMetrics(t, ctx, metricsEndpoint, `# HELP certmanager_csi_driver_issue_call_count_total The number of issue() calls made by the driver. +# TYPE certmanager_csi_driver_issue_call_count_total counter +certmanager_csi_driver_issue_call_count_total{node="f56fd9f8b",volume="test-vol"} 1 +# HELP certmanager_csi_managed_certificate_count_total The number of certificates managed by the csi driver. +# TYPE certmanager_csi_managed_certificate_count_total counter +certmanager_csi_managed_certificate_count_total{node="f56fd9f8b"} 1 +# HELP certmanager_csi_managed_volume_count_total The number of volume managed by the csi driver. +# TYPE certmanager_csi_managed_volume_count_total counter +certmanager_csi_managed_volume_count_total{node="f56fd9f8b"} 1 `) } From 0b19258cbc8407f157adcc913da27317d6b08a5a Mon Sep 17 00:00:00 2001 From: Jing Liu Date: Fri, 29 Aug 2025 11:27:50 -0700 Subject: [PATCH 4/7] fix minor golangci-lint lissue Signed-off-by: Jing Liu --- test/integration/metrics_test.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/integration/metrics_test.go b/test/integration/metrics_test.go index 0facffe..fc6c9f2 100644 --- a/test/integration/metrics_test.go +++ b/test/integration/metrics_test.go @@ -98,7 +98,9 @@ func TestMetricsServer(t *testing.T) { // Build metrics handler, and start metrics server with a random available port metricsHandler := metrics.New(&testLog, prometheus.NewRegistry()) - metricsLn, err := net.Listen("tcp", "127.0.0.1:0") + // listenConfig + listenConfig := &net.ListenConfig{} + metricsLn, err := listenConfig.Listen(ctx, "tcp", "127.0.0.1:0") if err != nil { t.Fatal(err) } From d1a0f3ef97fd729029ddf2c021aacfe8ecbf6339 Mon Sep 17 00:00:00 2001 From: Jing Liu Date: Fri, 12 Sep 2025 11:36:22 -0700 Subject: [PATCH 5/7] Use prometheus collector pattern for metrics Signed-off-by: Jing Liu --- manager/manager.go | 51 ++--- manager/manager_test.go | 17 +- metrics/certificaterequest.go | 102 ---------- metrics/certificaterequest_collector.go | 249 ++++++++++++++++++++++++ metrics/certificaterequest_test.go | 167 ++++++++++++---- metrics/metrics.go | 101 ++-------- test/integration/metrics_test.go | 61 ++++-- 7 files changed, 465 insertions(+), 283 deletions(-) delete mode 100644 metrics/certificaterequest.go create mode 100644 metrics/certificaterequest_collector.go diff --git a/manager/manager.go b/manager/manager.go index 77beb9a..5698932 100644 --- a/manager/manager.go +++ b/manager/manager.go @@ -34,7 +34,6 @@ import ( cminformers "github.com/cert-manager/cert-manager/pkg/client/informers/externalversions" cmlisters "github.com/cert-manager/cert-manager/pkg/client/listers/certmanager/v1" "github.com/go-logr/logr" - "github.com/prometheus/client_golang/prometheus" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" @@ -131,9 +130,6 @@ func NewManager(opts Options) (*Manager, error) { if opts.Log == nil { return nil, errors.New("log must be set") } - if opts.Metrics == nil { - opts.Metrics = metrics.New(opts.Log, prometheus.NewRegistry()) - } if opts.MetadataReader == nil { return nil, errors.New("MetadataReader must be set") } @@ -400,7 +396,9 @@ func (m *Manager) issue(ctx context.Context, volumeID string) error { log.Info("Processing issuance") // Increase issue count - m.metrics.IncrementIssueCallCountTotal(m.nodeNameHash, volumeID) + if m.metrics != nil { + m.metrics.IncrementIssueCallCountTotal(m.nodeNameHash, volumeID) + } if err := m.cleanupStaleRequests(ctx, log, volumeID); err != nil { return fmt.Errorf("cleaning up stale requests: %w", err) @@ -609,7 +607,7 @@ func (m *Manager) handleRequest(ctx context.Context, volumeID string, meta metad // Calculate the default next issuance time. // The implementation's writeKeypair function may override this value before // writing to the storage layer. - expiryPoint, renewalPoint, err := getExpiryAndDefaultNextIssuanceTime(req.Status.Certificate) + renewalPoint, err := calculateNextIssuanceTime(req.Status.Certificate) if err != nil { return fmt.Errorf("calculating next issuance time: %w", err) } @@ -621,10 +619,6 @@ func (m *Manager) handleRequest(ctx context.Context, volumeID string, meta metad } log.V(2).Info("Wrote new keypair to storage") - // Update the request metrics. - // Using meta.NextIssuanceTime instead of renewalPoint here, in case writeKeypair overrides the value. - m.metrics.UpdateCertificateRequest(req, expiryPoint, *meta.NextIssuanceTime) - // We must explicitly delete the private key from the pending requests map so that the existing Completed // request will not be re-used upon renewal. // Without this, the renewal would pick up the existing issued certificate and re-issue, rather than requesting @@ -676,9 +670,6 @@ func (m *Manager) cleanupStaleRequests(ctx context.Context, log logr.Logger, vol } } - // Remove the CertificateRequest from the metrics. - m.metrics.RemoveCertificateRequest(toDelete.Name, toDelete.Namespace) - log.Info("Deleted CertificateRequest resource", "name", toDelete.Name, "namespace", toDelete.Namespace) } @@ -779,7 +770,9 @@ func (m *Manager) ManageVolumeImmediate(ctx context.Context, volumeID string) (m // how to proceed depending on the context this method was called within. if err := m.issue(ctx, volumeID); err != nil { // Increase issue error count - m.metrics.IncrementIssueErrorCountTotal(m.nodeNameHash, volumeID) + if m.metrics != nil { + m.metrics.IncrementIssueErrorCountTotal(m.nodeNameHash, volumeID) + } return true, err } } @@ -807,8 +800,6 @@ func (m *Manager) manageVolumeIfNotManaged(volumeID string) (managed bool) { // construct a new channel used to stop management of the volume stopCh := make(chan struct{}) m.managedVolumes[volumeID] = stopCh - // Increase managed volume count for this driver - m.metrics.IncrementManagedVolumeCountTotal(m.nodeNameHash) return true } @@ -826,10 +817,6 @@ func (m *Manager) startRenewalRoutine(volumeID string) (started bool) { return false } - // Increase managed certificate count for this driver. - // We assume each volume will have one certificate to be managed. - m.metrics.IncrementManagedCertificateCountTotal(m.nodeNameHash) - // Create a context that will be cancelled when the stopCh is closed ctx, cancel := context.WithCancel(context.Background()) go func() { @@ -866,7 +853,9 @@ func (m *Manager) startRenewalRoutine(volumeID string) (started bool) { if err := m.issue(issueCtx, volumeID); err != nil { log.Error(err, "Failed to issue certificate, retrying after applying exponential backoff") // Increase issue error count - m.metrics.IncrementIssueErrorCountTotal(m.nodeNameHash, volumeID) + if m.metrics != nil { + m.metrics.IncrementIssueErrorCountTotal(m.nodeNameHash, volumeID) + } return false, nil } return true, nil @@ -906,14 +895,6 @@ func (m *Manager) UnmanageVolume(volumeID string) { if stopCh, ok := m.managedVolumes[volumeID]; ok { close(stopCh) delete(m.managedVolumes, volumeID) - if reqs, err := m.listAllRequestsForVolume(volumeID); err == nil { - // Remove the CertificateRequest from the metrics with the best effort. - for _, req := range reqs { - if req != nil { - m.metrics.RemoveCertificateRequest(req.Name, req.Namespace) - } - } - } } } @@ -959,19 +940,19 @@ func (m *Manager) Stop() { } } -// getExpiryAndDefaultNextIssuanceTime will return the certificate expiry time, together with -// default time at which the certificate should be renewed by the driver- 2/3rds through its -// lifetime (NotAfter - NotBefore). -func getExpiryAndDefaultNextIssuanceTime(chain []byte) (time.Time, time.Time, error) { +// calculateNextIssuanceTime will return the default time at which the certificate +// should be renewed by the driver- 2/3rds through its lifetime (NotAfter - +// NotBefore). +func calculateNextIssuanceTime(chain []byte) (time.Time, error) { block, _ := pem.Decode(chain) crt, err := x509.ParseCertificate(block.Bytes) if err != nil { - return time.Time{}, time.Time{}, fmt.Errorf("parsing issued certificate: %w", err) + return time.Time{}, fmt.Errorf("parsing issued certificate: %w", err) } actualDuration := crt.NotAfter.Sub(crt.NotBefore) renewBeforeNotAfter := actualDuration / 3 - return crt.NotAfter, crt.NotAfter.Add(-renewBeforeNotAfter), nil + return crt.NotAfter.Add(-renewBeforeNotAfter), nil } diff --git a/manager/manager_test.go b/manager/manager_test.go index 21bea15..c350be3 100644 --- a/manager/manager_test.go +++ b/manager/manager_test.go @@ -470,7 +470,7 @@ func TestManager_cleanupStaleRequests(t *testing.T) { } } -func Test_getExpiryAndDefaultNextIssuanceTime(t *testing.T) { +func Test_calculateNextIssuanceTime(t *testing.T) { notBefore := time.Date(1970, time.January, 1, 0, 0, 0, 0, time.UTC) notAfter := time.Date(1970, time.January, 4, 0, 0, 0, 0, time.UTC) pk, err := rsa.GenerateKey(rand.Reader, 2048) @@ -490,23 +490,20 @@ func Test_getExpiryAndDefaultNextIssuanceTime(t *testing.T) { certPEM := pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: derBytes}) tests := map[string]struct { - expTime time.Time - renewTime time.Time - expErr bool + expTime time.Time + expErr bool }{ "if no attributes given, return 2/3rd certificate lifetime": { - expTime: notAfter, - renewTime: notBefore.AddDate(0, 0, 2), - expErr: false, + expTime: notBefore.AddDate(0, 0, 2), + expErr: false, }, } for name, test := range tests { t.Run(name, func(t *testing.T) { - expTime, renewTime, err := getExpiryAndDefaultNextIssuanceTime(certPEM) + renewTime, err := calculateNextIssuanceTime(certPEM) assert.Equal(t, test.expErr, err != nil) - assert.Equal(t, test.expTime, expTime) - assert.Equal(t, test.renewTime, renewTime) + assert.Equal(t, test.expTime, renewTime) }) } } diff --git a/metrics/certificaterequest.go b/metrics/certificaterequest.go deleted file mode 100644 index 40a91fc..0000000 --- a/metrics/certificaterequest.go +++ /dev/null @@ -1,102 +0,0 @@ -/* -Copyright 2024 The cert-manager Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package metrics - -import ( - "time" - - "github.com/prometheus/client_golang/prometheus" - - cmapi "github.com/cert-manager/cert-manager/pkg/apis/certmanager/v1" - cmmeta "github.com/cert-manager/cert-manager/pkg/apis/meta/v1" -) - -var readyConditionStatuses = [...]cmmeta.ConditionStatus{ - cmmeta.ConditionTrue, - cmmeta.ConditionFalse, - cmmeta.ConditionUnknown, -} - -// UpdateCertificateRequest will update the given CertificateRequest's metrics for its expiry, renewal, and status condition. -func (m *Metrics) UpdateCertificateRequest(cr *cmapi.CertificateRequest, exp, renewal time.Time) { - m.updateCertificateRequestExpiryAndRenewalTime(cr, exp, renewal) - m.updateCertificateRequestStatus(cr) -} - -// updateCertificateRequestExpiryAndRenewalTime updates the expiry and renewal time of a certificate request -func (m *Metrics) updateCertificateRequestExpiryAndRenewalTime(cr *cmapi.CertificateRequest, exp, renewal time.Time) { - expiryTime := 0.0 - if !exp.IsZero() { - expiryTime = float64(exp.Unix()) - } - m.certificateRequestExpiryTimeSeconds.With(prometheus.Labels{ - "name": cr.Name, - "namespace": cr.Namespace, - "issuer_name": cr.Spec.IssuerRef.Name, - "issuer_kind": cr.Spec.IssuerRef.Kind, - "issuer_group": cr.Spec.IssuerRef.Group}).Set(expiryTime) - - renewalTime := 0.0 - if !renewal.IsZero() { - renewalTime = float64(renewal.Unix()) - } - m.certificateRequestRenewalTimeSeconds.With(prometheus.Labels{ - "name": cr.Name, - "namespace": cr.Namespace, - "issuer_name": cr.Spec.IssuerRef.Name, - "issuer_kind": cr.Spec.IssuerRef.Kind, - "issuer_group": cr.Spec.IssuerRef.Group}).Set(renewalTime) -} - -// updateCertificateRequestStatus will update the metric for that Certificate Request -func (m *Metrics) updateCertificateRequestStatus(cr *cmapi.CertificateRequest) { - for _, c := range cr.Status.Conditions { - if c.Type == cmapi.CertificateRequestConditionReady { - m.updateCertificateRequestReadyStatus(cr, c.Status) - return - } - } - - // If no status condition set yet, set to Unknown - m.updateCertificateRequestReadyStatus(cr, cmmeta.ConditionUnknown) -} - -func (m *Metrics) updateCertificateRequestReadyStatus(cr *cmapi.CertificateRequest, current cmmeta.ConditionStatus) { - for _, condition := range readyConditionStatuses { - value := 0.0 - - if current == condition { - value = 1.0 - } - - m.certificateRequestReadyStatus.With(prometheus.Labels{ - "name": cr.Name, - "namespace": cr.Namespace, - "condition": string(condition), - "issuer_name": cr.Spec.IssuerRef.Name, - "issuer_kind": cr.Spec.IssuerRef.Kind, - "issuer_group": cr.Spec.IssuerRef.Group, - }).Set(value) - } -} - -// RemoveCertificateRequest will delete the CertificateRequest metrics from continuing to be exposed. -func (m *Metrics) RemoveCertificateRequest(name, namespace string) { - m.certificateRequestExpiryTimeSeconds.DeletePartialMatch(prometheus.Labels{"name": name, "namespace": namespace}) - m.certificateRequestRenewalTimeSeconds.DeletePartialMatch(prometheus.Labels{"name": name, "namespace": namespace}) - m.certificateRequestReadyStatus.DeletePartialMatch(prometheus.Labels{"name": name, "namespace": namespace}) -} diff --git a/metrics/certificaterequest_collector.go b/metrics/certificaterequest_collector.go new file mode 100644 index 0000000..6433f5b --- /dev/null +++ b/metrics/certificaterequest_collector.go @@ -0,0 +1,249 @@ +/* +Copyright 2025 The cert-manager Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "crypto/x509" + "encoding/pem" + "fmt" + "time" + + cmapi "github.com/cert-manager/cert-manager/pkg/apis/certmanager/v1" + cmmeta "github.com/cert-manager/cert-manager/pkg/apis/meta/v1" + cmlisters "github.com/cert-manager/cert-manager/pkg/client/listers/certmanager/v1" + "github.com/prometheus/client_golang/prometheus" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/selection" + + internalapi "github.com/cert-manager/csi-lib/internal/api" + internalapiutil "github.com/cert-manager/csi-lib/internal/api/util" + "github.com/cert-manager/csi-lib/storage" +) + +var ( + certRequestReadyConditionStatuses = [...]cmmeta.ConditionStatus{cmmeta.ConditionTrue, cmmeta.ConditionFalse, cmmeta.ConditionUnknown} + certRequestReadyStatusMetric = prometheus.NewDesc("certmanager_csi_certificate_request_ready_status", "The ready status of the certificate request.", []string{"name", "namespace", "condition", "issuer_name", "issuer_kind", "issuer_group"}, nil) + certRequestExpirationTimestampSeconds = prometheus.NewDesc("certmanager_csi_certificate_request_expiration_timestamp_seconds", "The timestamp after which the certificate request expires, expressed in Unix Epoch Time.", []string{"name", "namespace", "issuer_name", "issuer_kind", "issuer_group"}, nil) + certRequestRenewalTimestampSeconds = prometheus.NewDesc("certmanager_csi_certificate_request_renewal_timestamp_seconds", "The timestamp after which the certificate request should be renewed, expressed in Unix Epoch Time.", []string{"name", "namespace", "issuer_name", "issuer_kind", "issuer_group"}, nil) + managedVolumeCountTotal = prometheus.NewDesc("certmanager_csi_managed_volume_count_total", "The total number of managed volumes by the csi driver.", []string{"node"}, nil) + managedCertRequestCountTotal = prometheus.NewDesc("certmanager_csi_managed_certificate_request_count_total", "The total number of managed certificate requests by the csi driver.", []string{"node"}, nil) +) + +type CertificateRequestCollector struct { + nodeNameHash string + metadataReader storage.MetadataReader + certificateRequestLister cmlisters.CertificateRequestLister + certificateRequestReadyStatusMetric *prometheus.Desc + certificateRequestExpirationTimestampSeconds *prometheus.Desc + certificateRequestRenewalTimestampSeconds *prometheus.Desc + managedVolumeCountTotal *prometheus.Desc + managedCertificateRequestCountTotal *prometheus.Desc +} + +func NewCertificateRequestCollector(nodeNameHash string, metadataReader storage.MetadataReader, certificateRequestLister cmlisters.CertificateRequestLister) prometheus.Collector { + return &CertificateRequestCollector{ + nodeNameHash: nodeNameHash, + metadataReader: metadataReader, + certificateRequestLister: certificateRequestLister, + certificateRequestReadyStatusMetric: certRequestReadyStatusMetric, + certificateRequestExpirationTimestampSeconds: certRequestExpirationTimestampSeconds, + certificateRequestRenewalTimestampSeconds: certRequestRenewalTimestampSeconds, + managedVolumeCountTotal: managedVolumeCountTotal, + managedCertificateRequestCountTotal: managedCertRequestCountTotal, + } +} + +func (cc *CertificateRequestCollector) Describe(ch chan<- *prometheus.Desc) { + ch <- cc.certificateRequestReadyStatusMetric + ch <- cc.certificateRequestExpirationTimestampSeconds + ch <- cc.certificateRequestRenewalTimestampSeconds + ch <- cc.managedVolumeCountTotal + ch <- cc.managedCertificateRequestCountTotal +} + +func (cc *CertificateRequestCollector) Collect(ch chan<- prometheus.Metric) { + // Get the certificate requests from the lister, filtered by node selector + nodeSelector := labels.NewSelector() + req, err := labels.NewRequirement(internalapi.NodeIDHashLabelKey, selection.Equals, []string{cc.nodeNameHash}) + if err != nil { + return + } + nodeSelector = nodeSelector.Add(*req) + certRequestList, err := cc.certificateRequestLister.List(nodeSelector) + if err != nil { + return + } + cc.updateManagedCertificateRequestCount(len(certRequestList), ch) + + // Get the next issuance time map from the metadata reader + nextIssuanceTimeMap, err := cc.getNextIssuanceTimeMapFromMetadata() + if err != nil { + return + } + cc.updateManagedVolumeCount(len(nextIssuanceTimeMap), ch) // each volume has one nextIssuanceTime entry + + for _, cr := range certRequestList { + cc.updateCertificateRequestReadyStatus(cr, ch) + cc.updateCertificateRequestExpiry(cr, ch) + cc.updateCertificateRequestRenewalTime(cr, nextIssuanceTimeMap, ch) + } +} + +func (cc *CertificateRequestCollector) updateCertificateRequestReadyStatus(cr *cmapi.CertificateRequest, ch chan<- prometheus.Metric) { + setMetric := func(cr *cmapi.CertificateRequest, ch chan<- prometheus.Metric, status cmmeta.ConditionStatus) { + for _, condition := range certRequestReadyConditionStatuses { + value := 0.0 + + if status == condition { + value = 1.0 + } + + metric := prometheus.MustNewConstMetric( + cc.certificateRequestReadyStatusMetric, prometheus.GaugeValue, + value, + cr.Name, + cr.Namespace, + string(condition), + cr.Spec.IssuerRef.Name, + cr.Spec.IssuerRef.Kind, + cr.Spec.IssuerRef.Group, + ) + + ch <- metric + } + } + + for _, st := range cr.Status.Conditions { + if st.Type == cmapi.CertificateRequestConditionReady { + setMetric(cr, ch, st.Status) + return + } + } + + setMetric(cr, ch, cmmeta.ConditionUnknown) +} + +func (cc *CertificateRequestCollector) updateCertificateRequestExpiry(cr *cmapi.CertificateRequest, ch chan<- prometheus.Metric) { + expiryTime := 0.0 + + if cr.Status.Certificate != nil { + notAfter, err := getCertNotAfterTime(cr.Status.Certificate) + if err != nil { + return + } + expiryTime = float64(notAfter.Unix()) + } + + metric := prometheus.MustNewConstMetric( + cc.certificateRequestExpirationTimestampSeconds, + prometheus.GaugeValue, + expiryTime, + cr.Name, + cr.Namespace, + cr.Spec.IssuerRef.Name, + cr.Spec.IssuerRef.Kind, + cr.Spec.IssuerRef.Group, + ) + + ch <- metric +} + +// updateCertificateRequestRenewalTime updates the renewal time metric for the given certificate request. +// The renewal time is the time at which the volume should be renewed. +// Note: there might be multiple certificate requests for a volume depending on the MaxRequestsPerVolume value, +// but only the latest one will be stored in the nextIssuanceTimeMap. +func (cc *CertificateRequestCollector) updateCertificateRequestRenewalTime(cr *cmapi.CertificateRequest, nextIssuanceTimeMap map[string]time.Time, ch chan<- prometheus.Metric) { + renewalTime := 0.0 + + if len(cr.Labels) != 0 { + if nextIssuanceTime, ok := nextIssuanceTimeMap[cr.Labels[internalapi.VolumeIDHashLabelKey]]; ok { + renewalTime = float64(nextIssuanceTime.Unix()) + } + } + + metric := prometheus.MustNewConstMetric( + cc.certificateRequestRenewalTimestampSeconds, + prometheus.GaugeValue, + renewalTime, + cr.Name, + cr.Namespace, + cr.Spec.IssuerRef.Name, + cr.Spec.IssuerRef.Kind, + cr.Spec.IssuerRef.Group, + ) + + ch <- metric +} + +// getCertNotAfterTime returns the NotAfter time of the issued certificate. +// It expects the certificate to be encoded in PEM format. +func getCertNotAfterTime(certBytes []byte) (time.Time, error) { + block, _ := pem.Decode(certBytes) + if block == nil { + return time.Time{}, fmt.Errorf("invalid PEM data: could not decode certificate") + } + crt, err := x509.ParseCertificate(block.Bytes) + if err != nil { + return time.Time{}, fmt.Errorf("parsing issued certificate: %w", err) + } + + return crt.NotAfter, nil +} + +// getNextIssuanceTimeMapFromMetadata returns a map of volume ID hashes to the next issuance time. +// The map is keyed by the volume ID hash. +// The next issuance time is the time at which the volume should be renewed. +func (cc *CertificateRequestCollector) getNextIssuanceTimeMapFromMetadata() (map[string]time.Time, error) { + volumeIDs, err := cc.metadataReader.ListVolumes() + if err != nil { + return nil, fmt.Errorf("listing volumes: %w", err) + } + + nextIssuanceTimeMap := make(map[string]time.Time, len(volumeIDs)) + for _, id := range volumeIDs { + volumeMetadata, err := cc.metadataReader.ReadMetadata(id) + if err != nil { + return nil, err + } + if volumeMetadata.NextIssuanceTime != nil { + nextIssuanceTimeMap[internalapiutil.HashIdentifier(id)] = *volumeMetadata.NextIssuanceTime + } + } + return nextIssuanceTimeMap, nil +} + +func (cc *CertificateRequestCollector) updateManagedVolumeCount(count int, ch chan<- prometheus.Metric) { + metric := prometheus.MustNewConstMetric( + cc.managedVolumeCountTotal, + prometheus.CounterValue, + float64(count), + cc.nodeNameHash, + ) + + ch <- metric +} + +func (cc *CertificateRequestCollector) updateManagedCertificateRequestCount(count int, ch chan<- prometheus.Metric) { + metric := prometheus.MustNewConstMetric( + cc.managedCertificateRequestCountTotal, + prometheus.CounterValue, + float64(count), + cc.nodeNameHash, + ) + + ch <- metric +} diff --git a/metrics/certificaterequest_test.go b/metrics/certificaterequest_test.go index 1ec7f5b..be51e04 100644 --- a/metrics/certificaterequest_test.go +++ b/metrics/certificaterequest_test.go @@ -21,24 +21,31 @@ import ( "testing" "time" + cmapi "github.com/cert-manager/cert-manager/pkg/apis/certmanager/v1" + cmmeta "github.com/cert-manager/cert-manager/pkg/apis/meta/v1" + "github.com/cert-manager/cert-manager/pkg/client/clientset/versioned/fake" + "github.com/cert-manager/cert-manager/pkg/client/informers/externalversions" + testcrypto "github.com/cert-manager/cert-manager/test/unit/crypto" + "github.com/cert-manager/cert-manager/test/unit/gen" "github.com/go-logr/logr/testr" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/stretchr/testify/assert" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - cmapi "github.com/cert-manager/cert-manager/pkg/apis/certmanager/v1" - cmmeta "github.com/cert-manager/cert-manager/pkg/apis/meta/v1" - testcrypto "github.com/cert-manager/cert-manager/test/unit/crypto" - "github.com/cert-manager/cert-manager/test/unit/gen" + internalapi "github.com/cert-manager/csi-lib/internal/api" + internalapiutil "github.com/cert-manager/csi-lib/internal/api/util" + "github.com/cert-manager/csi-lib/metadata" + "github.com/cert-manager/csi-lib/storage" ) const expiryMetadata = ` - # HELP certmanager_csi_certificate_request_expiration_timestamp_seconds The date after which the certificate request expires. Expressed as a Unix Epoch Time. + # HELP certmanager_csi_certificate_request_expiration_timestamp_seconds The timestamp after which the certificate request expires, expressed in Unix Epoch Time. # TYPE certmanager_csi_certificate_request_expiration_timestamp_seconds gauge ` const renewalTimeMetadata = ` - # HELP certmanager_csi_certificate_request_renewal_timestamp_seconds The number of seconds before expiration time the certificate request should renew. + # HELP certmanager_csi_certificate_request_renewal_timestamp_seconds The timestamp after which the certificate request should be renewed, expressed in Unix Epoch Time. # TYPE certmanager_csi_certificate_request_renewal_timestamp_seconds gauge ` @@ -48,9 +55,25 @@ const readyMetadata = ` ` func TestCertificateRequestMetrics(t *testing.T) { + testNodeName := "test-node-name" + testVolumeID := "test-vol-id" + + // private key to be used to generate X509 certificate + privKey := testcrypto.MustCreatePEMPrivateKey(t) + certTemplate := &cmapi.Certificate{ + ObjectMeta: metav1.ObjectMeta{Namespace: "test-ns", Name: "test-cert"}, + Spec: cmapi.CertificateSpec{ + CommonName: "test.example.com", + }, + } + notBefore := time.Unix(0, 0) + notAfter := time.Unix(100, 0) + testCert := testcrypto.MustCreateCertWithNotBeforeAfter(t, privKey, certTemplate, notBefore, notAfter) + renew := time.Unix(50, 0) + type testT struct { cr *cmapi.CertificateRequest - notAfter, renewBefore time.Time + meta metadata.Metadata expectedExpiry, expectedReady, expectedRenewalTime string } tests := map[string]testT{ @@ -66,11 +89,11 @@ func TestCertificateRequestMetrics(t *testing.T) { Type: cmapi.CertificateRequestConditionReady, Status: cmmeta.ConditionTrue, }), + gen.SetCertificateRequestCertificate(testCert), ), - notAfter: time.Unix(2208988804, 0), expectedExpiry: ` - certmanager_csi_certificate_request_expiration_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 2.208988804e+09 + certmanager_csi_certificate_request_expiration_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 100 `, expectedReady: ` certmanager_csi_certificate_request_ready_status{condition="False",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 0 @@ -115,8 +138,8 @@ func TestCertificateRequestMetrics(t *testing.T) { Type: cmapi.CertificateRequestConditionReady, Status: cmmeta.ConditionFalse, }), + gen.SetCertificateRequestCertificate(testCert), ), - notAfter: time.Unix(100, 0), expectedExpiry: ` certmanager_csi_certificate_request_expiration_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 100 @@ -142,11 +165,11 @@ func TestCertificateRequestMetrics(t *testing.T) { Type: cmapi.CertificateRequestConditionReady, Status: cmmeta.ConditionUnknown, }), + gen.SetCertificateRequestCertificate(testCert), ), - notAfter: time.Unix(99999, 0), expectedExpiry: ` - certmanager_csi_certificate_request_expiration_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 99999 + certmanager_csi_certificate_request_expiration_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 100 `, expectedReady: ` certmanager_csi_certificate_request_ready_status{condition="False",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 0 @@ -169,12 +192,15 @@ func TestCertificateRequestMetrics(t *testing.T) { Type: cmapi.CertificateRequestConditionReady, Status: cmmeta.ConditionTrue, }), + gen.SetCertificateRequestCertificate(testCert), ), - notAfter: time.Unix(2208988804, 0), - renewBefore: time.Unix(2108988804, 0), + meta: metadata.Metadata{ + VolumeID: testVolumeID, + NextIssuanceTime: &renew, + }, expectedExpiry: ` - certmanager_csi_certificate_request_expiration_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 2.208988804e+09 + certmanager_csi_certificate_request_expiration_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 100 `, expectedReady: ` certmanager_csi_certificate_request_ready_status{condition="False",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 0 @@ -182,7 +208,7 @@ func TestCertificateRequestMetrics(t *testing.T) { certmanager_csi_certificate_request_ready_status{condition="Unknown",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 0 `, expectedRenewalTime: ` - certmanager_csi_certificate_request_renewal_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 2.108988804e+09 + certmanager_csi_certificate_request_renewal_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-certificate-request",namespace="test-ns"} 50 `, }, } @@ -190,35 +216,50 @@ func TestCertificateRequestMetrics(t *testing.T) { t.Run(n, func(t *testing.T) { testLog := testr.New(t) m := New(&testLog, prometheus.NewRegistry()) - m.UpdateCertificateRequest(test.cr, test.notAfter, test.renewBefore) - if err := testutil.CollectAndCompare(m.certificateRequestExpiryTimeSeconds, + fakeClient := fake.NewSimpleClientset() + factory := externalversions.NewSharedInformerFactory(fakeClient, 0) + certRequestInformer := factory.Certmanager().V1().CertificateRequests() + test.cr.Labels = map[string]string{ + internalapi.NodeIDHashLabelKey: internalapiutil.HashIdentifier(testNodeName), + internalapi.VolumeIDHashLabelKey: internalapiutil.HashIdentifier(testVolumeID), + } + err := certRequestInformer.Informer().GetIndexer().Add(test.cr) + assert.NoError(t, err) + fakeMetadata := storage.NewMemoryFS() + fakeMetadata.RegisterMetadata(test.meta) + m.SetupCertificateRequestCollector(internalapiutil.HashIdentifier(testNodeName), fakeMetadata, certRequestInformer.Lister()) + + if err := testutil.CollectAndCompare(m.certificateRequestCollector, strings.NewReader(expiryMetadata+test.expectedExpiry), "certmanager_csi_certificate_request_expiration_timestamp_seconds", ); err != nil { t.Errorf("unexpected collecting result:\n%s", err) } - if err := testutil.CollectAndCompare(m.certificateRequestRenewalTimeSeconds, + if err := testutil.CollectAndCompare(m.certificateRequestCollector, strings.NewReader(renewalTimeMetadata+test.expectedRenewalTime), "certmanager_csi_certificate_request_renewal_timestamp_seconds", ); err != nil { t.Errorf("unexpected collecting result:\n%s", err) } - if err := testutil.CollectAndCompare(m.certificateRequestReadyStatus, + if err := testutil.CollectAndCompare(m.certificateRequestCollector, strings.NewReader(readyMetadata+test.expectedReady), "certmanager_csi_certificate_request_ready_status", ); err != nil { t.Errorf("unexpected collecting result:\n%s", err) } + + err = certRequestInformer.Informer().GetIndexer().Delete(test.cr) + assert.NoError(t, err) }) } } func TestCertificateRequestCache(t *testing.T) { - testLog := testr.New(t) - m := New(&testLog, prometheus.NewRegistry()) + testNodeName := "test-node-name" + testNodeNameHash := internalapiutil.HashIdentifier(testNodeName) // private key to be used to generate X509 certificate privKey := testcrypto.MustCreatePEMPrivateKey(t) @@ -277,13 +318,47 @@ func TestCertificateRequestCache(t *testing.T) { testcrypto.MustCreateCertWithNotBeforeAfter(t, privKey, certTemplate, notBefore, notAfter3)), ) - // Observe all three Certificate metrics - m.UpdateCertificateRequest(cr1, notAfter1, renew1) - m.UpdateCertificateRequest(cr2, notAfter2, renew2) - m.UpdateCertificateRequest(cr3, notAfter3, renew3) + cr1.Labels = map[string]string{ + internalapi.NodeIDHashLabelKey: testNodeNameHash, + internalapi.VolumeIDHashLabelKey: internalapiutil.HashIdentifier("vol-1"), + } + cr2.Labels = map[string]string{ + internalapi.NodeIDHashLabelKey: testNodeNameHash, + internalapi.VolumeIDHashLabelKey: internalapiutil.HashIdentifier("vol-2"), + } + cr3.Labels = map[string]string{ + internalapi.NodeIDHashLabelKey: testNodeNameHash, + internalapi.VolumeIDHashLabelKey: internalapiutil.HashIdentifier("vol-3"), + } + + fakeMetadata := storage.NewMemoryFS() + fakeMetadata.RegisterMetadata(metadata.Metadata{ + VolumeID: "vol-1", NextIssuanceTime: &renew1, + }) + fakeMetadata.RegisterMetadata(metadata.Metadata{ + VolumeID: "vol-2", NextIssuanceTime: &renew2, + }) + fakeMetadata.RegisterMetadata(metadata.Metadata{ + VolumeID: "vol-3", NextIssuanceTime: &renew3, + }) + + fakeClient := fake.NewSimpleClientset() + factory := externalversions.NewSharedInformerFactory(fakeClient, 0) + certRequestInformer := factory.Certmanager().V1().CertificateRequests() + + err := certRequestInformer.Informer().GetIndexer().Add(cr1) + assert.NoError(t, err) + err = certRequestInformer.Informer().GetIndexer().Add(cr2) + assert.NoError(t, err) + err = certRequestInformer.Informer().GetIndexer().Add(cr3) + assert.NoError(t, err) + + testLog := testr.New(t) + m := New(&testLog, prometheus.NewRegistry()) + m.SetupCertificateRequestCollector(testNodeNameHash, fakeMetadata, certRequestInformer.Lister()) // Check all three metrics exist - if err := testutil.CollectAndCompare(m.certificateRequestReadyStatus, + if err := testutil.CollectAndCompare(m.certificateRequestCollector, strings.NewReader(readyMetadata+` certmanager_csi_certificate_request_ready_status{condition="False",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr1",namespace="testns"} 0 certmanager_csi_certificate_request_ready_status{condition="False",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr2",namespace="testns"} 0 @@ -299,7 +374,7 @@ func TestCertificateRequestCache(t *testing.T) { ); err != nil { t.Errorf("unexpected collecting result:\n%s", err) } - if err := testutil.CollectAndCompare(m.certificateRequestExpiryTimeSeconds, + if err := testutil.CollectAndCompare(m.certificateRequestCollector, strings.NewReader(expiryMetadata+` certmanager_csi_certificate_request_expiration_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr1",namespace="testns"} 100 certmanager_csi_certificate_request_expiration_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr2",namespace="testns"} 200 @@ -310,7 +385,7 @@ func TestCertificateRequestCache(t *testing.T) { t.Errorf("unexpected collecting result:\n%s", err) } - if err := testutil.CollectAndCompare(m.certificateRequestRenewalTimeSeconds, + if err := testutil.CollectAndCompare(m.certificateRequestCollector, strings.NewReader(renewalTimeMetadata+` certmanager_csi_certificate_request_renewal_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr1",namespace="testns"} 50 certmanager_csi_certificate_request_renewal_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr2",namespace="testns"} 150 @@ -322,8 +397,10 @@ func TestCertificateRequestCache(t *testing.T) { } // Remove second certificate and check not exists - m.RemoveCertificateRequest("cr2", "testns") - if err := testutil.CollectAndCompare(m.certificateRequestReadyStatus, + err = certRequestInformer.Informer().GetIndexer().Delete(cr2) + assert.NoError(t, err) + + if err := testutil.CollectAndCompare(m.certificateRequestCollector, strings.NewReader(readyMetadata+` certmanager_csi_certificate_request_ready_status{condition="False",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr1",namespace="testns"} 0 certmanager_csi_certificate_request_ready_status{condition="False",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr3",namespace="testns"} 1 @@ -336,7 +413,7 @@ func TestCertificateRequestCache(t *testing.T) { ); err != nil { t.Errorf("unexpected collecting result:\n%s", err) } - if err := testutil.CollectAndCompare(m.certificateRequestExpiryTimeSeconds, + if err := testutil.CollectAndCompare(m.certificateRequestCollector, strings.NewReader(expiryMetadata+` certmanager_csi_certificate_request_expiration_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr1",namespace="testns"} 100 certmanager_csi_certificate_request_expiration_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr3",namespace="testns"} 300 @@ -345,15 +422,31 @@ func TestCertificateRequestCache(t *testing.T) { ); err != nil { t.Errorf("unexpected collecting result:\n%s", err) } + if err := testutil.CollectAndCompare(m.certificateRequestCollector, + strings.NewReader(renewalTimeMetadata+` + certmanager_csi_certificate_request_renewal_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr1",namespace="testns"} 50 + certmanager_csi_certificate_request_renewal_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="cr3",namespace="testns"} 250 +`), + "certmanager_csi_certificate_request_renewal_timestamp_seconds", + ); err != nil { + t.Errorf("unexpected collecting result:\n%s", err) + } // Remove all Certificates (even is already removed) and observe no Certificates - m.RemoveCertificateRequest("cr1", "testns") - m.RemoveCertificateRequest("cr2", "testns") - m.RemoveCertificateRequest("cr3", "testns") - if testutil.CollectAndCount(m.certificateRequestReadyStatus, "certmanager_csi_certificate_request_ready_status") != 0 { + err = certRequestInformer.Informer().GetIndexer().Delete(cr1) + assert.NoError(t, err) + err = certRequestInformer.Informer().GetIndexer().Delete(cr2) + assert.NoError(t, err) + err = certRequestInformer.Informer().GetIndexer().Delete(cr3) + assert.NoError(t, err) + + if testutil.CollectAndCount(m.certificateRequestCollector, "certmanager_csi_certificate_request_ready_status") != 0 { + t.Errorf("unexpected collecting result") + } + if testutil.CollectAndCount(m.certificateRequestCollector, "certmanager_csi_certificate_request_expiration_timestamp_seconds") != 0 { t.Errorf("unexpected collecting result") } - if testutil.CollectAndCount(m.certificateRequestExpiryTimeSeconds, "certmanager_csi_certificate_request_expiration_timestamp_seconds") != 0 { + if testutil.CollectAndCount(m.certificateRequestCollector, "certmanager_csi_certificate_request_renewal_timestamp_seconds") != 0 { t.Errorf("unexpected collecting result") } } diff --git a/metrics/metrics.go b/metrics/metrics.go index 47bbc2f..d14c808 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -19,9 +19,12 @@ package metrics import ( "net/http" + cmlisters "github.com/cert-manager/cert-manager/pkg/client/listers/certmanager/v1" "github.com/go-logr/logr" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" + + "github.com/cert-manager/csi-lib/storage" ) const ( @@ -35,48 +38,15 @@ type Metrics struct { log logr.Logger registry *prometheus.Registry - certificateRequestExpiryTimeSeconds *prometheus.GaugeVec - certificateRequestRenewalTimeSeconds *prometheus.GaugeVec - certificateRequestReadyStatus *prometheus.GaugeVec - driverIssueCallCountTotal *prometheus.CounterVec - driverIssueErrorCountTotal *prometheus.CounterVec - managedVolumeCountTotal *prometheus.CounterVec - managedCertificateCountTotal *prometheus.CounterVec + driverIssueCallCountTotal *prometheus.CounterVec + driverIssueErrorCountTotal *prometheus.CounterVec + certificateRequestCollector prometheus.Collector } // New creates a Metrics struct and populates it with prometheus metric types. func New(logger *logr.Logger, registry *prometheus.Registry) *Metrics { var ( - certificateRequestExpiryTimeSeconds = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: subsystem, - Name: "certificate_request_expiration_timestamp_seconds", - Help: "The date after which the certificate request expires. Expressed as a Unix Epoch Time.", - }, - []string{"name", "namespace", "issuer_name", "issuer_kind", "issuer_group"}, - ) - - certificateRequestRenewalTimeSeconds = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: subsystem, - Name: "certificate_request_renewal_timestamp_seconds", - Help: "The number of seconds before expiration time the certificate request should renew.", - }, - []string{"name", "namespace", "issuer_name", "issuer_kind", "issuer_group"}, - ) - - certificateRequestReadyStatus = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: subsystem, - Name: "certificate_request_ready_status", - Help: "The ready status of the certificate request.", - }, - []string{"name", "namespace", "condition", "issuer_name", "issuer_kind", "issuer_group"}, - ) - + // driverIssueCallCountTotal is a Prometheus counter for the number of issue() calls made by the driver. driverIssueCallCountTotal = prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: namespace, @@ -87,6 +57,8 @@ func New(logger *logr.Logger, registry *prometheus.Registry) *Metrics { []string{"node", "volume"}, ) + // driverIssueErrorCountTotal is a Prometheus counter for the number of errors encountered + // during the driver issue() calls. driverIssueErrorCountTotal = prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: namespace, @@ -96,26 +68,11 @@ func New(logger *logr.Logger, registry *prometheus.Registry) *Metrics { }, []string{"node", "volume"}, ) + ) - managedVolumeCountTotal = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: subsystem, - Name: "managed_volume_count_total", - Help: "The number of volume managed by the csi driver.", - }, - []string{"node"}, - ) - - managedCertificateCountTotal = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: subsystem, - Name: "managed_certificate_count_total", - Help: "The number of certificates managed by the csi driver.", - }, - []string{"node"}, - ) + registry.MustRegister( + driverIssueCallCountTotal, + driverIssueErrorCountTotal, ) // Create server and register Prometheus metrics handler @@ -123,23 +80,10 @@ func New(logger *logr.Logger, registry *prometheus.Registry) *Metrics { log: logger.WithName("metrics"), registry: registry, - certificateRequestExpiryTimeSeconds: certificateRequestExpiryTimeSeconds, - certificateRequestRenewalTimeSeconds: certificateRequestRenewalTimeSeconds, - certificateRequestReadyStatus: certificateRequestReadyStatus, - driverIssueCallCountTotal: driverIssueCallCountTotal, - driverIssueErrorCountTotal: driverIssueErrorCountTotal, - managedVolumeCountTotal: managedVolumeCountTotal, - managedCertificateCountTotal: managedCertificateCountTotal, + driverIssueCallCountTotal: driverIssueCallCountTotal, + driverIssueErrorCountTotal: driverIssueErrorCountTotal, } - m.registry.MustRegister(m.certificateRequestExpiryTimeSeconds) - m.registry.MustRegister(m.certificateRequestRenewalTimeSeconds) - m.registry.MustRegister(m.certificateRequestReadyStatus) - m.registry.MustRegister(m.driverIssueCallCountTotal) - m.registry.MustRegister(m.driverIssueErrorCountTotal) - m.registry.MustRegister(m.managedVolumeCountTotal) - m.registry.MustRegister(m.managedCertificateCountTotal) - return m } @@ -151,6 +95,11 @@ func (m *Metrics) DefaultHandler() http.Handler { return mux } +func (m *Metrics) SetupCertificateRequestCollector(nodeNameHash string, metadataReader storage.MetadataReader, certificateRequestLister cmlisters.CertificateRequestLister) { + m.certificateRequestCollector = NewCertificateRequestCollector(nodeNameHash, metadataReader, certificateRequestLister) + m.registry.MustRegister(m.certificateRequestCollector) +} + // IncrementIssueCallCountTotal will increase the issue call counter for the driver. func (m *Metrics) IncrementIssueCallCountTotal(nodeNameHash, volumeID string) { m.driverIssueCallCountTotal.WithLabelValues(nodeNameHash, volumeID).Inc() @@ -160,13 +109,3 @@ func (m *Metrics) IncrementIssueCallCountTotal(nodeNameHash, volumeID string) { func (m *Metrics) IncrementIssueErrorCountTotal(nodeNameHash, volumeID string) { m.driverIssueErrorCountTotal.WithLabelValues(nodeNameHash, volumeID).Inc() } - -// IncrementManagedVolumeCountTotal will increase the managed volume counter for the driver. -func (m *Metrics) IncrementManagedVolumeCountTotal(nodeNameHash string) { - m.managedVolumeCountTotal.WithLabelValues(nodeNameHash).Inc() -} - -// IncrementManagedCertificateCountTotal will increase the managed certificate count for the driver. -func (m *Metrics) IncrementManagedCertificateCountTotal(nodeNameHash string) { - m.managedCertificateCountTotal.WithLabelValues(nodeNameHash).Inc() -} diff --git a/test/integration/metrics_test.go b/test/integration/metrics_test.go index fc6c9f2..be18f9f 100644 --- a/test/integration/metrics_test.go +++ b/test/integration/metrics_test.go @@ -29,6 +29,11 @@ import ( "testing" "time" + cmapi "github.com/cert-manager/cert-manager/pkg/apis/certmanager/v1" + cmmeta "github.com/cert-manager/cert-manager/pkg/apis/meta/v1" + "github.com/cert-manager/cert-manager/pkg/client/clientset/versioned/fake" + "github.com/cert-manager/cert-manager/pkg/client/informers/externalversions" + testcrypto "github.com/cert-manager/cert-manager/test/unit/crypto" "github.com/container-storage-interface/spec/lib/go/csi" "github.com/go-logr/logr/testr" "github.com/prometheus/client_golang/prometheus" @@ -36,9 +41,7 @@ import ( "k8s.io/apimachinery/pkg/util/wait" fakeclock "k8s.io/utils/clock/testing" - cmapi "github.com/cert-manager/cert-manager/pkg/apis/certmanager/v1" - cmmeta "github.com/cert-manager/cert-manager/pkg/apis/meta/v1" - testcrypto "github.com/cert-manager/cert-manager/test/unit/crypto" + internalapiutil "github.com/cert-manager/csi-lib/internal/api/util" "github.com/cert-manager/csi-lib/manager" "github.com/cert-manager/csi-lib/metadata" "github.com/cert-manager/csi-lib/metrics" @@ -95,9 +98,20 @@ func TestMetricsServer(t *testing.T) { testLog := testr.New(t) testNamespace := "test-ns" + testNodeId := "test-node" // Build metrics handler, and start metrics server with a random available port metricsHandler := metrics.New(&testLog, prometheus.NewRegistry()) + store := storage.NewMemoryFS() + fakeClient := fake.NewSimpleClientset() + // client-go imposes a minimum resync period of 1 second, so that is the lowest we can go + // https://github.com/kubernetes/client-go/blob/5a019202120ab4dd7dfb3788e5cb87269f343ebe/tools/cache/shared_informer.go#L575 + factory := externalversions.NewSharedInformerFactory(fakeClient, time.Second) + certRequestInformer := factory.Certmanager().V1().CertificateRequests() + metricsHandler.SetupCertificateRequestCollector(internalapiutil.HashIdentifier(testNodeId), store, certRequestInformer.Lister()) + factory.Start(ctx.Done()) + factory.WaitForCacheSync(ctx.Done()) + // listenConfig listenConfig := &net.ListenConfig{} metricsLn, err := listenConfig.Listen(ctx, "tcp", "127.0.0.1:0") @@ -135,12 +149,13 @@ func TestMetricsServer(t *testing.T) { }() // Build and start the driver - store := storage.NewMemoryFS() clock := fakeclock.NewFakeClock(time.Now()) opts, cl, stop := testdriver.Run(t, testdriver.Options{ Store: store, Clock: clock, Metrics: metricsHandler, + Client: fakeClient, + NodeID: testNodeId, Log: &testLog, GeneratePrivateKey: func(meta metadata.Metadata) (crypto.PrivateKey, error) { return nil, nil @@ -163,7 +178,7 @@ func TestMetricsServer(t *testing.T) { "ca": ca, "cert": chain, }) - nextIssuanceTime := clock.Now().Add(time.Hour) + nextIssuanceTime := time.Unix(200, 0) meta.NextIssuanceTime = &nextIssuanceTime return store.WriteMetadata(meta.VolumeID, meta) }, @@ -172,7 +187,13 @@ func TestMetricsServer(t *testing.T) { // Should expose no additional metrics metricsEndpoint := fmt.Sprintf("http://%s/metrics", metricsServer.Addr) - waitForMetrics(t, ctx, metricsEndpoint, "") + waitForMetrics(t, ctx, metricsEndpoint, `# HELP certmanager_csi_managed_certificate_request_count_total The total number of managed certificate requests by the csi driver. +# TYPE certmanager_csi_managed_certificate_request_count_total counter +certmanager_csi_managed_certificate_request_count_total{node="f56fd9f8b"} 0 +# HELP certmanager_csi_managed_volume_count_total The total number of managed volumes by the csi driver. +# TYPE certmanager_csi_managed_volume_count_total counter +certmanager_csi_managed_volume_count_total{node="f56fd9f8b"} 0 +`) // Create a self-signed Certificate and wait for it to be issued privKey := testcrypto.MustCreatePEMPrivateKey(t) @@ -213,8 +234,8 @@ func TestMetricsServer(t *testing.T) { } // Should expose that CertificateRequest as ready with expiry and renewal time - // node="f56fd9f8b" is the hash value of "test-node" defined in driver_testing.go - expectedOutputTemplate := `# HELP certmanager_csi_certificate_request_expiration_timestamp_seconds The date after which the certificate request expires. Expressed as a Unix Epoch Time. + // node="f56fd9f8b" is the hash value of "test-node" + expectedOutputTemplate := `# HELP certmanager_csi_certificate_request_expiration_timestamp_seconds The timestamp after which the certificate request expires, expressed in Unix Epoch Time. # TYPE certmanager_csi_certificate_request_expiration_timestamp_seconds gauge certmanager_csi_certificate_request_expiration_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-cr-name",namespace="test-ns"} 300 # HELP certmanager_csi_certificate_request_ready_status The ready status of the certificate request. @@ -222,16 +243,16 @@ certmanager_csi_certificate_request_expiration_timestamp_seconds{issuer_group="t certmanager_csi_certificate_request_ready_status{condition="False",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-cr-name",namespace="test-ns"} 0 certmanager_csi_certificate_request_ready_status{condition="True",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-cr-name",namespace="test-ns"} 1 certmanager_csi_certificate_request_ready_status{condition="Unknown",issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-cr-name",namespace="test-ns"} 0 -# HELP certmanager_csi_certificate_request_renewal_timestamp_seconds The number of seconds before expiration time the certificate request should renew. +# HELP certmanager_csi_certificate_request_renewal_timestamp_seconds The timestamp after which the certificate request should be renewed, expressed in Unix Epoch Time. # TYPE certmanager_csi_certificate_request_renewal_timestamp_seconds gauge certmanager_csi_certificate_request_renewal_timestamp_seconds{issuer_group="test-issuer-group",issuer_kind="test-issuer-kind",issuer_name="test-issuer",name="test-cr-name",namespace="test-ns"} 200 # HELP certmanager_csi_driver_issue_call_count_total The number of issue() calls made by the driver. # TYPE certmanager_csi_driver_issue_call_count_total counter certmanager_csi_driver_issue_call_count_total{node="f56fd9f8b",volume="test-vol"} 1 -# HELP certmanager_csi_managed_certificate_count_total The number of certificates managed by the csi driver. -# TYPE certmanager_csi_managed_certificate_count_total counter -certmanager_csi_managed_certificate_count_total{node="f56fd9f8b"} 1 -# HELP certmanager_csi_managed_volume_count_total The number of volume managed by the csi driver. +# HELP certmanager_csi_managed_certificate_request_count_total The total number of managed certificate requests by the csi driver. +# TYPE certmanager_csi_managed_certificate_request_count_total counter +certmanager_csi_managed_certificate_request_count_total{node="f56fd9f8b"} 1 +# HELP certmanager_csi_managed_volume_count_total The total number of managed volumes by the csi driver. # TYPE certmanager_csi_managed_volume_count_total counter certmanager_csi_managed_volume_count_total{node="f56fd9f8b"} 1 ` @@ -245,17 +266,21 @@ certmanager_csi_managed_volume_count_total{node="f56fd9f8b"} 1 if err != nil { t.Fatal(err) } + err = opts.Client.CertmanagerV1().CertificateRequests(testNamespace).Delete(ctx, req.Name, metav1.DeleteOptions{}) + if err != nil { + t.Fatal(err) + } // Should expose no CertificateRequest and only metrics counters waitForMetrics(t, ctx, metricsEndpoint, `# HELP certmanager_csi_driver_issue_call_count_total The number of issue() calls made by the driver. # TYPE certmanager_csi_driver_issue_call_count_total counter certmanager_csi_driver_issue_call_count_total{node="f56fd9f8b",volume="test-vol"} 1 -# HELP certmanager_csi_managed_certificate_count_total The number of certificates managed by the csi driver. -# TYPE certmanager_csi_managed_certificate_count_total counter -certmanager_csi_managed_certificate_count_total{node="f56fd9f8b"} 1 -# HELP certmanager_csi_managed_volume_count_total The number of volume managed by the csi driver. +# HELP certmanager_csi_managed_certificate_request_count_total The total number of managed certificate requests by the csi driver. +# TYPE certmanager_csi_managed_certificate_request_count_total counter +certmanager_csi_managed_certificate_request_count_total{node="f56fd9f8b"} 0 +# HELP certmanager_csi_managed_volume_count_total The total number of managed volumes by the csi driver. # TYPE certmanager_csi_managed_volume_count_total counter -certmanager_csi_managed_volume_count_total{node="f56fd9f8b"} 1 +certmanager_csi_managed_volume_count_total{node="f56fd9f8b"} 0 `) } From 11cc5138bdcf908d7aa2e1e5a538f809f20aad7a Mon Sep 17 00:00:00 2001 From: Jing Liu Date: Tue, 16 Sep 2025 14:19:28 -0700 Subject: [PATCH 6/7] Add metric example to simple-csi Signed-off-by: Jing Liu --- examples/simple/go.mod | 5 +- examples/simple/go.sum | 2 + examples/simple/main.go | 80 +++++++++++++++++++++++++++++- metrics/certificaterequest_test.go | 4 +- metrics/metrics.go | 5 +- test/integration/metrics_test.go | 3 +- 6 files changed, 89 insertions(+), 10 deletions(-) diff --git a/examples/simple/go.mod b/examples/simple/go.mod index e01a2bf..62dfd8c 100644 --- a/examples/simple/go.mod +++ b/examples/simple/go.mod @@ -7,6 +7,9 @@ replace github.com/cert-manager/csi-lib => ../../ require ( github.com/cert-manager/cert-manager v1.19.0 github.com/cert-manager/csi-lib v0.0.0-00010101000000-000000000000 + github.com/go-logr/logr v1.4.3 + github.com/prometheus/client_golang v1.23.2 + golang.org/x/sync v0.17.0 k8s.io/client-go v0.34.1 k8s.io/klog/v2 v2.130.1 k8s.io/utils v0.0.0-20250820121507-0af2bda4dd1d @@ -23,7 +26,6 @@ require ( github.com/fxamacker/cbor/v2 v2.9.0 // indirect github.com/go-asn1-ber/asn1-ber v1.5.8-0.20250403174932-29230038a667 // indirect github.com/go-ldap/ldap/v3 v3.4.12 // indirect - github.com/go-logr/logr v1.4.3 // indirect github.com/go-logr/zapr v1.3.0 // indirect github.com/go-openapi/jsonpointer v0.22.1 // indirect github.com/go-openapi/jsonreference v0.21.2 // indirect @@ -43,7 +45,6 @@ require ( github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect - github.com/prometheus/client_golang v1.23.2 // indirect github.com/prometheus/client_model v0.6.2 // indirect github.com/prometheus/common v0.66.1 // indirect github.com/prometheus/procfs v0.17.0 // indirect diff --git a/examples/simple/go.sum b/examples/simple/go.sum index 496c929..4549f63 100644 --- a/examples/simple/go.sum +++ b/examples/simple/go.sum @@ -171,6 +171,8 @@ golang.org/x/oauth2 v0.31.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwE golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug= +golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= diff --git a/examples/simple/main.go b/examples/simple/main.go index d2feb08..ceb1a68 100644 --- a/examples/simple/main.go +++ b/examples/simple/main.go @@ -28,6 +28,7 @@ import ( "flag" "fmt" "net" + "net/http" "net/url" "strings" "time" @@ -35,7 +36,11 @@ import ( cmapi "github.com/cert-manager/cert-manager/pkg/apis/certmanager/v1" cmmeta "github.com/cert-manager/cert-manager/pkg/apis/meta/v1" cmclient "github.com/cert-manager/cert-manager/pkg/client/clientset/versioned" + "github.com/cert-manager/cert-manager/pkg/client/informers/externalversions" "github.com/cert-manager/cert-manager/pkg/util/pki" + "github.com/go-logr/logr" + "github.com/prometheus/client_golang/prometheus" + "golang.org/x/sync/errgroup" "k8s.io/client-go/rest" "k8s.io/klog/v2/klogr" "k8s.io/utils/clock" @@ -43,6 +48,7 @@ import ( "github.com/cert-manager/csi-lib/driver" "github.com/cert-manager/csi-lib/manager" "github.com/cert-manager/csi-lib/metadata" + "github.com/cert-manager/csi-lib/metrics" "github.com/cert-manager/csi-lib/storage" ) @@ -104,13 +110,23 @@ func main() { store.FSGroupVolumeAttributeKey = FsGroupKey - d, err := driver.New(context.Background(), *endpoint, log, driver.Options{ + cmClient := cmclient.NewForConfigOrDie(restConfig) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + metricsHandler, err := startMetricsServer(ctx, *nodeID, log, cmClient, store) + if err != nil { + panic("failed to setup metrics server: " + err.Error()) + } + + d, err := driver.New(ctx, *endpoint, log, driver.Options{ DriverName: "csi.cert-manager.io", DriverVersion: "v0.0.1", NodeID: *nodeID, Store: store, Manager: manager.NewManagerOrDie(manager.Options{ - Client: cmclient.NewForConfigOrDie(restConfig), + Client: cmClient, MetadataReader: store, Clock: clock.RealClock{}, Log: &log, @@ -119,6 +135,7 @@ func main() { GenerateRequest: generateRequest, SignRequest: signRequest, WriteKeypair: (&writer{store: store}).writeKeypair, + Metrics: metricsHandler, }), }) if err != nil { @@ -351,3 +368,62 @@ func keyUsagesFromAttributes(usagesCSV string) []cmapi.KeyUsage { return keyUsages } + +// startMetricsServer starts a server listening on port 6443, until the supplied context is cancelled, +// after which the server will gracefully shutdown (within 5 seconds). +func startMetricsServer( + rootCtx context.Context, + nodeId string, + logger logr.Logger, + cmClient *cmclient.Clientset, + metadataReader storage.MetadataReader, +) (*metrics.Metrics, error) { + g, ctx := errgroup.WithContext(rootCtx) + defer func() { + if err := g.Wait(); err != nil { + logger.Error(err, "fail to stop metric server") + } + }() + + metricsHandler := metrics.New(&logger, prometheus.NewRegistry()) + + certRequestInformerFactory := externalversions.NewSharedInformerFactory(cmClient, 5*time.Second) + certRequestInformer := certRequestInformerFactory.Certmanager().V1().CertificateRequests() + metricsHandler.SetupCertificateRequestCollector(nodeId, metadataReader, certRequestInformer.Lister()) + + listenConfig := &net.ListenConfig{} + metricsLn, err := listenConfig.Listen(ctx, "tcp", "127.0.0.1:6443") + if err != nil { + return nil, err + } + metricsServer := &http.Server{ + Addr: metricsLn.Addr().String(), + ReadTimeout: 8 * time.Second, + WriteTimeout: 8 * time.Second, + MaxHeaderBytes: 1 << 20, // 1 MiB + Handler: metricsHandler.DefaultHandler(), + } + + g.Go(func() error { + certRequestInformerFactory.Start(ctx.Done()) + certRequestInformerFactory.WaitForCacheSync(ctx.Done()) + return nil + }) + g.Go(func() error { + <-rootCtx.Done() + // allow a timeout for graceful shutdown + shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + // nolint: contextcheck + return metricsServer.Shutdown(shutdownCtx) + }) + g.Go(func() error { + logger.Info("starting metrics server", "address", metricsLn.Addr()) + if err := metricsServer.Serve(metricsLn); err != http.ErrServerClosed { + return err + } + return nil + }) + return metricsHandler, nil +} diff --git a/metrics/certificaterequest_test.go b/metrics/certificaterequest_test.go index be51e04..7cb77c5 100644 --- a/metrics/certificaterequest_test.go +++ b/metrics/certificaterequest_test.go @@ -228,7 +228,7 @@ func TestCertificateRequestMetrics(t *testing.T) { assert.NoError(t, err) fakeMetadata := storage.NewMemoryFS() fakeMetadata.RegisterMetadata(test.meta) - m.SetupCertificateRequestCollector(internalapiutil.HashIdentifier(testNodeName), fakeMetadata, certRequestInformer.Lister()) + m.SetupCertificateRequestCollector(testNodeName, fakeMetadata, certRequestInformer.Lister()) if err := testutil.CollectAndCompare(m.certificateRequestCollector, strings.NewReader(expiryMetadata+test.expectedExpiry), @@ -355,7 +355,7 @@ func TestCertificateRequestCache(t *testing.T) { testLog := testr.New(t) m := New(&testLog, prometheus.NewRegistry()) - m.SetupCertificateRequestCollector(testNodeNameHash, fakeMetadata, certRequestInformer.Lister()) + m.SetupCertificateRequestCollector(testNodeName, fakeMetadata, certRequestInformer.Lister()) // Check all three metrics exist if err := testutil.CollectAndCompare(m.certificateRequestCollector, diff --git a/metrics/metrics.go b/metrics/metrics.go index d14c808..b0985f3 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -24,6 +24,7 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" + internalapiutil "github.com/cert-manager/csi-lib/internal/api/util" "github.com/cert-manager/csi-lib/storage" ) @@ -95,8 +96,8 @@ func (m *Metrics) DefaultHandler() http.Handler { return mux } -func (m *Metrics) SetupCertificateRequestCollector(nodeNameHash string, metadataReader storage.MetadataReader, certificateRequestLister cmlisters.CertificateRequestLister) { - m.certificateRequestCollector = NewCertificateRequestCollector(nodeNameHash, metadataReader, certificateRequestLister) +func (m *Metrics) SetupCertificateRequestCollector(nodeId string, metadataReader storage.MetadataReader, certificateRequestLister cmlisters.CertificateRequestLister) { + m.certificateRequestCollector = NewCertificateRequestCollector(internalapiutil.HashIdentifier(nodeId), metadataReader, certificateRequestLister) m.registry.MustRegister(m.certificateRequestCollector) } diff --git a/test/integration/metrics_test.go b/test/integration/metrics_test.go index be18f9f..f032ced 100644 --- a/test/integration/metrics_test.go +++ b/test/integration/metrics_test.go @@ -41,7 +41,6 @@ import ( "k8s.io/apimachinery/pkg/util/wait" fakeclock "k8s.io/utils/clock/testing" - internalapiutil "github.com/cert-manager/csi-lib/internal/api/util" "github.com/cert-manager/csi-lib/manager" "github.com/cert-manager/csi-lib/metadata" "github.com/cert-manager/csi-lib/metrics" @@ -108,7 +107,7 @@ func TestMetricsServer(t *testing.T) { // https://github.com/kubernetes/client-go/blob/5a019202120ab4dd7dfb3788e5cb87269f343ebe/tools/cache/shared_informer.go#L575 factory := externalversions.NewSharedInformerFactory(fakeClient, time.Second) certRequestInformer := factory.Certmanager().V1().CertificateRequests() - metricsHandler.SetupCertificateRequestCollector(internalapiutil.HashIdentifier(testNodeId), store, certRequestInformer.Lister()) + metricsHandler.SetupCertificateRequestCollector(testNodeId, store, certRequestInformer.Lister()) factory.Start(ctx.Done()) factory.WaitForCacheSync(ctx.Done()) From b5a8ccd7df02fd36881ee468fef7eccf6dca2d57 Mon Sep 17 00:00:00 2001 From: Jing Liu Date: Wed, 17 Sep 2025 15:56:14 -0700 Subject: [PATCH 7/7] Consolidate metrics new function Signed-off-by: Jing Liu --- .../simple/deploy/01_simple-csi-driver.yaml | 6 ++- examples/simple/deploy/02_example-app.yaml | 2 +- examples/simple/main.go | 40 ++++++++----------- metrics/certificaterequest_test.go | 6 +-- metrics/metrics.go | 24 ++++++++--- test/integration/metrics_test.go | 3 +- 6 files changed, 44 insertions(+), 37 deletions(-) diff --git a/examples/simple/deploy/01_simple-csi-driver.yaml b/examples/simple/deploy/01_simple-csi-driver.yaml index 4fca9b8..4b35006 100644 --- a/examples/simple/deploy/01_simple-csi-driver.yaml +++ b/examples/simple/deploy/01_simple-csi-driver.yaml @@ -59,7 +59,7 @@ spec: allowPrivilegeEscalation: false capabilities: { drop: [ "ALL" ] } readOnlyRootFilesystem: true - image: registry.k8s.io/sig-storage/csi-node-driver-registrar:v2.13.0 + image: registry.k8s.io/sig-storage/csi-node-driver-registrar:v2.14.0 args: - --v=5 - --csi-address=/plugin/csi.sock @@ -94,6 +94,10 @@ spec: fieldPath: spec.nodeName - name: CSI_ENDPOINT value: unix://plugin/csi.sock + ports: + - containerPort: 9402 + name: http-metrics + protocol: TCP volumeMounts: - name: plugin-dir mountPath: /plugin diff --git a/examples/simple/deploy/02_example-app.yaml b/examples/simple/deploy/02_example-app.yaml index a63ea12..fdcd203 100644 --- a/examples/simple/deploy/02_example-app.yaml +++ b/examples/simple/deploy/02_example-app.yaml @@ -48,7 +48,7 @@ spec: runAsUser: 2000 containers: - name: my-frontend - image: busybox:1.35.0 + image: busybox:1.36.1 volumeMounts: - mountPath: "/tls" name: tls diff --git a/examples/simple/main.go b/examples/simple/main.go index ceb1a68..9c753d3 100644 --- a/examples/simple/main.go +++ b/examples/simple/main.go @@ -115,10 +115,16 @@ func main() { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - metricsHandler, err := startMetricsServer(ctx, *nodeID, log, cmClient, store) - if err != nil { - panic("failed to setup metrics server: " + err.Error()) - } + certRequestInformerFactory := externalversions.NewSharedInformerFactory(cmClient, 5*time.Second) + certRequestInformer := certRequestInformerFactory.Certmanager().V1().CertificateRequests() + metricsHandler := metrics.New(*nodeID, &log, prometheus.NewRegistry(), store, certRequestInformer.Lister()) + + go func() { + err := startMetricsServer(ctx, log, metricsHandler, certRequestInformerFactory) + if err != nil { + panic("failed to setup metrics server: " + err.Error()) + } + }() d, err := driver.New(ctx, *endpoint, log, driver.Options{ DriverName: "csi.cert-manager.io", @@ -369,32 +375,20 @@ func keyUsagesFromAttributes(usagesCSV string) []cmapi.KeyUsage { return keyUsages } -// startMetricsServer starts a server listening on port 6443, until the supplied context is cancelled, +// startMetricsServer starts a server listening on port 9402, until the supplied context is cancelled, // after which the server will gracefully shutdown (within 5 seconds). func startMetricsServer( rootCtx context.Context, - nodeId string, logger logr.Logger, - cmClient *cmclient.Clientset, - metadataReader storage.MetadataReader, -) (*metrics.Metrics, error) { + metricsHandler *metrics.Metrics, + certRequestInformerFactory externalversions.SharedInformerFactory, +) error { g, ctx := errgroup.WithContext(rootCtx) - defer func() { - if err := g.Wait(); err != nil { - logger.Error(err, "fail to stop metric server") - } - }() - - metricsHandler := metrics.New(&logger, prometheus.NewRegistry()) - - certRequestInformerFactory := externalversions.NewSharedInformerFactory(cmClient, 5*time.Second) - certRequestInformer := certRequestInformerFactory.Certmanager().V1().CertificateRequests() - metricsHandler.SetupCertificateRequestCollector(nodeId, metadataReader, certRequestInformer.Lister()) listenConfig := &net.ListenConfig{} - metricsLn, err := listenConfig.Listen(ctx, "tcp", "127.0.0.1:6443") + metricsLn, err := listenConfig.Listen(ctx, "tcp", ":9402") if err != nil { - return nil, err + return err } metricsServer := &http.Server{ Addr: metricsLn.Addr().String(), @@ -425,5 +419,5 @@ func startMetricsServer( } return nil }) - return metricsHandler, nil + return g.Wait() } diff --git a/metrics/certificaterequest_test.go b/metrics/certificaterequest_test.go index 7cb77c5..de38e27 100644 --- a/metrics/certificaterequest_test.go +++ b/metrics/certificaterequest_test.go @@ -215,7 +215,6 @@ func TestCertificateRequestMetrics(t *testing.T) { for n, test := range tests { t.Run(n, func(t *testing.T) { testLog := testr.New(t) - m := New(&testLog, prometheus.NewRegistry()) fakeClient := fake.NewSimpleClientset() factory := externalversions.NewSharedInformerFactory(fakeClient, 0) @@ -228,7 +227,7 @@ func TestCertificateRequestMetrics(t *testing.T) { assert.NoError(t, err) fakeMetadata := storage.NewMemoryFS() fakeMetadata.RegisterMetadata(test.meta) - m.SetupCertificateRequestCollector(testNodeName, fakeMetadata, certRequestInformer.Lister()) + m := New(testNodeName, &testLog, prometheus.NewRegistry(), fakeMetadata, certRequestInformer.Lister()) if err := testutil.CollectAndCompare(m.certificateRequestCollector, strings.NewReader(expiryMetadata+test.expectedExpiry), @@ -354,8 +353,7 @@ func TestCertificateRequestCache(t *testing.T) { assert.NoError(t, err) testLog := testr.New(t) - m := New(&testLog, prometheus.NewRegistry()) - m.SetupCertificateRequestCollector(testNodeName, fakeMetadata, certRequestInformer.Lister()) + m := New(testNodeName, &testLog, prometheus.NewRegistry(), fakeMetadata, certRequestInformer.Lister()) // Check all three metrics exist if err := testutil.CollectAndCompare(m.certificateRequestCollector, diff --git a/metrics/metrics.go b/metrics/metrics.go index b0985f3..983b48f 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -45,7 +45,13 @@ type Metrics struct { } // New creates a Metrics struct and populates it with prometheus metric types. -func New(logger *logr.Logger, registry *prometheus.Registry) *Metrics { +func New( + nodeId string, + logger *logr.Logger, + registry *prometheus.Registry, + metadataReader storage.MetadataReader, + certificateRequestLister cmlisters.CertificateRequestLister, +) *Metrics { var ( // driverIssueCallCountTotal is a Prometheus counter for the number of issue() calls made by the driver. driverIssueCallCountTotal = prometheus.NewCounterVec( @@ -71,11 +77,6 @@ func New(logger *logr.Logger, registry *prometheus.Registry) *Metrics { ) ) - registry.MustRegister( - driverIssueCallCountTotal, - driverIssueErrorCountTotal, - ) - // Create server and register Prometheus metrics handler m := &Metrics{ log: logger.WithName("metrics"), @@ -83,8 +84,19 @@ func New(logger *logr.Logger, registry *prometheus.Registry) *Metrics { driverIssueCallCountTotal: driverIssueCallCountTotal, driverIssueErrorCountTotal: driverIssueErrorCountTotal, + certificateRequestCollector: NewCertificateRequestCollector( + internalapiutil.HashIdentifier(nodeId), + metadataReader, + certificateRequestLister, + ), } + m.registry.MustRegister( + driverIssueCallCountTotal, + driverIssueErrorCountTotal, + m.certificateRequestCollector, + ) + return m } diff --git a/test/integration/metrics_test.go b/test/integration/metrics_test.go index f032ced..f99209e 100644 --- a/test/integration/metrics_test.go +++ b/test/integration/metrics_test.go @@ -100,14 +100,13 @@ func TestMetricsServer(t *testing.T) { testNodeId := "test-node" // Build metrics handler, and start metrics server with a random available port - metricsHandler := metrics.New(&testLog, prometheus.NewRegistry()) store := storage.NewMemoryFS() fakeClient := fake.NewSimpleClientset() // client-go imposes a minimum resync period of 1 second, so that is the lowest we can go // https://github.com/kubernetes/client-go/blob/5a019202120ab4dd7dfb3788e5cb87269f343ebe/tools/cache/shared_informer.go#L575 factory := externalversions.NewSharedInformerFactory(fakeClient, time.Second) certRequestInformer := factory.Certmanager().V1().CertificateRequests() - metricsHandler.SetupCertificateRequestCollector(testNodeId, store, certRequestInformer.Lister()) + metricsHandler := metrics.New(testNodeId, &testLog, prometheus.NewRegistry(), store, certRequestInformer.Lister()) factory.Start(ctx.Done()) factory.WaitForCacheSync(ctx.Done())