openshift · jcantrill · Jun 25, 2025
diff --git a/api/observability/v1/conditions.go b/api/observability/v1/conditions.go
@@ -17,7 +17,6 @@ package v1
 import metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 
 const (
-
 	// ConditionTrue means the condition is met
 	ConditionTrue = metav1.ConditionTrue
 
@@ -30,8 +29,12 @@ const (
 	// ConditionTypeAuthorized identifies the state of authorization for the service
 	ConditionTypeAuthorized = GroupName + "/Authorized"
 
+	// ConditionTypeLogLevel validates the value of the log-level annotation
 	ConditionTypeLogLevel = GroupName + "/LogLevel"
 
+	// ConditionTypeMaxUnavailable validates the value of the max-unavailable-rollout annotation
+	ConditionTypeMaxUnavailable = GroupName + "/MaxUnavailableAnnotation"
+
 	// ConditionTypeReady indicates the service is ready.
 	//
 	// Ready=True means the operands are running and providing some service.
@@ -77,9 +80,12 @@ const (
 	// ReasonMissingSpec applies when a type is specified without a defined spec (e.g. type application without obs.Application)
 	ReasonMissingSpec = "MissingSpec"
 
-	// ReasonLogLevelSupported indicates the support for the log level annotation value
+	// ReasonLogLevelSupported indicates the support for the log-level annotation value
 	ReasonLogLevelSupported = "LogLevelSupported"
 
+	// ReasonMaxUnavailableSupported indicates the support for the max-unavailable-rollout annotation value
+	ReasonMaxUnavailableSupported = "MaxUnavailableAnnotationSupported"
+
 	// ReasonReconciliationComplete when the operator has initialized, validated, and deployed the resources for the workload
 	ReasonReconciliationComplete = "ReconciliationComplete"
 

diff --git a/api/observability/v1/output_types.go b/api/observability/v1/output_types.go
@@ -744,7 +744,7 @@ type Kafka struct {
 	Brokers []BrokerURL `json:"brokers,omitempty"`
 }
 
-// +kubebuilder:validation:XValidation:rule="self == '' || (isURL(self) && (self.startsWith('tcp://') || self.startsWith('tls://')))",message="each broker must be a valid URL with a tcp or tls scheme"
+// +kubebuilder:validation:XValidation:rule="isURL(self) && (self.startsWith('tcp://') || self.startsWith('tls://'))",message="each broker must be a valid URL with a tcp or tls scheme"
 type BrokerURL string
 
 type LokiTuningSpec struct {

diff --git a/bundle/manifests/collector_monitoring.coreos.com_v1_prometheusrule.yaml b/bundle/manifests/collector_monitoring.coreos.com_v1_prometheusrule.yaml
@@ -6,6 +6,21 @@ spec:
   groups:
   - name: logging_collector.alerts
     rules:
+    - alert: ClusterLogForwarderDeprecations
+      annotations:
+        message: The Cluster Logging Operator version {{$labels.version}} includes
+          deprecations to some feature of ClusterLogForwarder.
+        summary: |-
+          The Cluster Logging Operator version {{$labels.version}} includes deprecations to some features of ClusterLogForwarder which
+          will be removed in a future release.  Please see the release notes for details:
+          https://docs.redhat.com/en/documentation/red_hat_openshift_logging/6.2/html/release_notes
+      expr: |
+        max by (version) (csv_succeeded{exported_namespace="openshift-logging", name=~"cluster-logging.*", version=~"6.2.*"})  > 0
+      for: 1m
+      labels:
+        namespace: openshift-logging
+        service: collector
+        severity: info
     - alert: CollectorNodeDown
       annotations:
         description: Prometheus could not scrape {{ $labels.namespace }}/{{ $labels.pod

diff --git a/bundle/manifests/observability.openshift.io_clusterlogforwarders.yaml b/bundle/manifests/observability.openshift.io_clusterlogforwarders.yaml
@@ -1627,8 +1627,7 @@ spec:
                             x-kubernetes-validations:
                             - message: each broker must be a valid URL with a tcp
                                 or tls scheme
-                              rule: self == '' || (isURL(self) && (self.startsWith('tcp://')
-                                || self.startsWith('tls://')))
+                              rule: isURL(self) && (self.startsWith('tcp://') || self.startsWith('tls://'))
                           type: array
                         topic:
                           description: |-

diff --git a/config/crd/bases/observability.openshift.io_clusterlogforwarders.yaml b/config/crd/bases/observability.openshift.io_clusterlogforwarders.yaml
@@ -1627,8 +1627,7 @@ spec:
                             x-kubernetes-validations:
                             - message: each broker must be a valid URL with a tcp
                                 or tls scheme
-                              rule: self == '' || (isURL(self) && (self.startsWith('tcp://')
-                                || self.startsWith('tls://')))
+                              rule: isURL(self) && (self.startsWith('tcp://') || self.startsWith('tls://'))
                           type: array
                         topic:
                           description: |-

diff --git a/config/prometheus/collector_alerts.yaml b/config/prometheus/collector_alerts.yaml
@@ -7,6 +7,20 @@ spec:
   groups:
   - name: logging_collector.alerts
     rules:
+    - alert: ClusterLogForwarderDeprecations
+      annotations:
+        message: "The Cluster Logging Operator version {{$labels.version}} includes deprecations to some feature of ClusterLogForwarder."
+        summary: |-
+          The Cluster Logging Operator version {{$labels.version}} includes deprecations to some features of ClusterLogForwarder which
+          will be removed in a future release.  Please see the release notes for details:
+          https://docs.redhat.com/en/documentation/red_hat_openshift_logging/6.2/html/release_notes
+      expr: |
+        max by (version) (csv_succeeded{exported_namespace="openshift-logging", name=~"cluster-logging.*", version=~"6.2.*"})  > 0
+      for: 1m
+      labels:
+        namespace: openshift-logging
+        service: collector
+        severity: info
     - alert: CollectorNodeDown
       annotations:
         description: "Prometheus could not scrape {{ $labels.namespace }}/{{ $labels.pod }} collector component for more than 10m."

diff --git a/docs/features/kube-api-annotations.adoc b/docs/features/kube-api-annotations.adoc
@@ -0,0 +1,103 @@
+== Reducing memory pressure on the Kubernetes API server
+Steps to introduce rolling update configuration for the logging collector pods in large-scale clusters
+
+IMPORTANT: The enablement of this feature using an annotation is deprecated and is https://issues.redhat.com/browse/LOG-7587[replaced] in a future release by
+directly editing the ClusterLogForwarder spec.
+
+=== Description
+This feature enables the `use_apiserver_cache` config to the vector.toml, as well as a configurable rolling
+update `maxUnavailable` to the forwarder's DaemonSet. The 'maxUnavailable' feature is enabled through an annotation.
+
+==== Configuration
+* Update your ClusterLogForwarder instance and include the following `metadata.annotations`:
++
+[source,yaml]
+----
+ observability.openshift.io/max-unavailable-rollout: <percentage_or_number>
+----
++
+.example forwarder
+[source,yaml]
+----
+apiVersion: observability.openshift.io/v1
+kind: ClusterLogForwarder
+metadata:
+  annotations:
+    observability.openshift.io/max-unavailable-rollout: "20%"
+  name: my-forwarder
+  namespace: my-logging-namespace
+spec:
+ ...
+----
+NOTE: `max-unavailable-rollout` can be an absolute number (e.g., 1) or a percentage (e.g., 10%). The default is 100%.
++
+If you need guidance on updating your forwarder instance, please see the sections below
+
+==== Verifying
+* The following commands can be used to verify the two options exist and are enabled:
++
+.forwarder daemonset
+[source,bash]
+----
+  oc get ds <my-forwarder-name> -ojson | jq '.spec.updateStrategy'
+----
+
+
+===== Conditions
+* You can verify there are no `False` conditions in the forwarder validation
++
+.forwarder status
+[source,bash]
+----
+  oc get obsclf <my-forwarder-name> -ojson | jq 'items[0].status.conditions'
+----
++
+.invalid examples
+[source,json]
+----
+ {
+    "message": "max-unavailable-rollout value \"200%\" must be an absolute number or a valid percentage",
+    "reason": "MaxUnavailableAnnotationSupported",
+    "status": "False",
+    "type": "observability.openshift.io/MaxUnavailableAnnotation"
+  }
+
+----
++
+NOTE: The conditions for annotations only show when Invalid and the status is set to `False`.  If there are no entries that mention
+annotations, then either they were not found, or they are valid.
+
+==== Other Commands
+====
+* You can add an annotation using `oc patch` on the clusterlogforwarder instance
++
+.example command
+[source,bash]
+----
+  oc patch obsclf <my-forwarder-name> --type='merge' -p '{"metadata":{"annotations":{"observability.openshift.io/max-unavailable-rollout":"20%"}}}'
+----
+* Alternatively, you can pull down the forwarder instance and make your changes locally
++
+[source,bash]
+----
+  oc get obsclf <my-forwarder-name> -o yaml > my-forwarder.yaml
+----
++
+Then apply the local file
++
+[source,bash]
+----
+  oc apply -f my-forwarder.yaml
+----
+* You could also use `oc edit` directly on the instance
++
+[source,bash]
+----
+  oc edit obsclf <my-forwarder-name>
+----
+====
+
+==== References
+* Annotation Implemented: https://issues.redhat.com/browse/LOG-7196
+* Knowledgebase Article: https://access.redhat.com/solutions/7121949
+* Upstream Fix: https://github.com/vectordotdev/vector/pull/17095/files
diff --git a/internal/collector/collector.go b/internal/collector/collector.go
@@ -67,6 +67,8 @@ type Factory struct {
 	ResourceNames          *factory.ForwarderResourceNames
 	isDaemonset            bool
 	LogLevel               string
+	UseKubeCache           bool
+	MaxUnavailable         string
 }
 
 // CollectorResourceRequirements returns the resource requirements for a given collector implementation
@@ -85,7 +87,7 @@ func (f *Factory) Tolerations() []v1.Toleration {
 	return f.CollectorSpec.Tolerations
 }
 
-func New(confHash, clusterID string, collectorSpec *obs.CollectorSpec, secrets internalobs.Secrets, configMaps map[string]*v1.ConfigMap, forwarderSpec obs.ClusterLogForwarderSpec, resNames *factory.ForwarderResourceNames, isDaemonset bool, logLevel string) *Factory {
+func New(confHash, clusterID string, collectorSpec *obs.CollectorSpec, secrets internalobs.Secrets, configMaps map[string]*v1.ConfigMap, forwarderSpec obs.ClusterLogForwarderSpec, resNames *factory.ForwarderResourceNames, isDaemonset bool, logLevel string, maxUnavailable string) *Factory {
 	if collectorSpec == nil {
 		collectorSpec = &obs.CollectorSpec{}
 	}
@@ -105,13 +107,14 @@ func New(confHash, clusterID string, collectorSpec *obs.CollectorSpec, secrets i
 		PodLabelVisitor: vector.PodLogExcludeLabel,
 		isDaemonset:     isDaemonset,
 		LogLevel:        logLevel,
+		MaxUnavailable:  maxUnavailable,
 	}
 	return factory
 }
 
 func (f *Factory) NewDaemonSet(namespace, name string, trustedCABundle *v1.ConfigMap, tlsProfileSpec configv1.TLSProfileSpec) *apps.DaemonSet {
 	podSpec := f.NewPodSpec(trustedCABundle, f.ForwarderSpec, f.ClusterID, tlsProfileSpec, namespace)
-	ds := factory.NewDaemonSet(namespace, name, name, constants.CollectorName, constants.VectorName, *podSpec, f.CommonLabelInitializer, f.PodLabelVisitor)
+	ds := factory.NewDaemonSet(namespace, name, name, constants.CollectorName, constants.VectorName, f.MaxUnavailable, *podSpec, f.CommonLabelInitializer, f.PodLabelVisitor)
 	ds.Spec.Template.Annotations[constants.AnnotationSecretHash] = f.Secrets.Hash64a()
 	return ds
 }

diff --git a/internal/constants/annotations.go b/internal/constants/annotations.go
@@ -17,4 +17,8 @@ const (
 	AnnotationOtlpOutputTechPreview = "observability.openshift.io/tech-preview-otlp-output"
 
 	AnnotationSecretHash = "observability.openshift.io/secret-hash"
+
+	// AnnotationMaxUnavailable (Deprecated) configures the maximum number of DaemonSet pods that can be unavailable during a rolling update.
+	// This can be an absolute number (e.g., 1) or a percentage (e.g., 10%). Default is 100%.
+	AnnotationMaxUnavailable = "observability.openshift.io/max-unavailable-rollout"
 )
diff --git a/internal/controller/observability/collector.go b/internal/controller/observability/collector.go
@@ -21,6 +21,7 @@ import (
 	"github.com/openshift/cluster-logging-operator/internal/runtime/serviceaccount"
 	"github.com/openshift/cluster-logging-operator/internal/tls"
 	"github.com/openshift/cluster-logging-operator/internal/utils"
+	"github.com/openshift/cluster-logging-operator/internal/validations/observability"
 	corev1 "k8s.io/api/core/v1"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 )
@@ -42,6 +43,9 @@ func ReconcileCollector(context internalcontext.ForwarderContext, pollInterval,
 		options = context.AdditionalContext
 	}
 
+	// Set rollout options based on annotation (LOG-7196)
+	SetMaxUnavailableRolloutOption(context.Forwarder.Annotations, options)
+
 	if internalobs.Outputs(context.Forwarder.Spec.Outputs).NeedServiceAccountToken() {
 		// temporarily create SA token until collector is capable of dynamically reloading a projected serviceaccount token
 		var sa *corev1.ServiceAccount
@@ -88,28 +92,40 @@ func ReconcileCollector(context internalcontext.ForwarderContext, pollInterval,
 
 	isDaemonSet := !internalobs.DeployAsDeployment(*context.Forwarder)
 	log.V(3).Info("Deploying as DaemonSet", "isDaemonSet", isDaemonSet)
-	factory := collector.New(collectorConfHash, context.ClusterID, context.Forwarder.Spec.Collector, context.Secrets, context.ConfigMaps, context.Forwarder.Spec, resourceNames, isDaemonSet, LogLevel(context.Forwarder.Annotations))
-	if err = factory.ReconcileCollectorConfig(context.Client, context.Reader, context.Forwarder.Namespace, collectorConfig, ownerRef); err != nil {
+	collectorFactory := collector.New(
+		collectorConfHash,
+		context.ClusterID,
+		context.Forwarder.Spec.Collector,
+		context.Secrets, context.ConfigMaps,
+		context.Forwarder.Spec,
+		resourceNames,
+		isDaemonSet,
+		LogLevel(context.Forwarder.Annotations),
+		factory.GetMaxUnavailableValue(options),
+	)
+
+	if err = collectorFactory.ReconcileCollectorConfig(context.Client, context.Reader, context.Forwarder.Namespace, collectorConfig, ownerRef); err != nil {
 		log.Error(err, "collector.ReconcileCollectorConfig")
 		return
 	}
 
-	reconcileWorkload := factory.ReconcileDaemonset
+	reconcileWorkload := collectorFactory.ReconcileDaemonset
 	if !isDaemonSet {
-		reconcileWorkload = factory.ReconcileDeployment
+		reconcileWorkload = collectorFactory.ReconcileDeployment
 	}
+
 	if err := reconcileWorkload(context.Client, context.Forwarder.Namespace, trustedCABundle, ownerRef); err != nil {
 		log.Error(err, "Error reconciling the deployment of the collector")
 		return err
 	}
 
-	if err := factory.ReconcileInputServices(context.Client, context.Reader, context.Forwarder.Namespace, ownerRef, factory.CommonLabelInitializer); err != nil {
+	if err := collectorFactory.ReconcileInputServices(context.Client, context.Reader, context.Forwarder.Namespace, ownerRef, collectorFactory.CommonLabelInitializer); err != nil {
 		log.Error(err, "collector.ReconcileInputServices")
 		return err
 	}
 
 	// Reconcile resources to support metrics gathering
-	if err := network.ReconcileService(context.Client, context.Forwarder.Namespace, resourceNames.CommonName, context.Forwarder.Name, constants.CollectorName, collector.MetricsPortName, resourceNames.SecretMetrics, collector.MetricsPort, ownerRef, factory.CommonLabelInitializer); err != nil {
+	if err := network.ReconcileService(context.Client, context.Forwarder.Namespace, resourceNames.CommonName, context.Forwarder.Name, constants.CollectorName, collector.MetricsPortName, resourceNames.SecretMetrics, collector.MetricsPort, ownerRef, collectorFactory.CommonLabelInitializer); err != nil {
 		log.Error(err, "collector.ReconcileService")
 		return err
 	}
@@ -122,12 +138,12 @@ func ReconcileCollector(context internalcontext.ForwarderContext, pollInterval,
 	return nil
 }
 
-func GenerateConfig(k8Client client.Client, spec obs.ClusterLogForwarder, resourceNames factory.ForwarderResourceNames, secrets internalobs.Secrets, op framework.Options) (config string, err error) {
+func GenerateConfig(k8Client client.Client, clf obs.ClusterLogForwarder, resourceNames factory.ForwarderResourceNames, secrets internalobs.Secrets, op framework.Options) (config string, err error) {
 	tlsProfile, _ := tls.FetchAPIServerTlsProfile(k8Client)
 	op[framework.ClusterTLSProfileSpec] = tls.GetClusterTLSProfileSpec(tlsProfile)
 	//EvaluateAnnotationsForEnabledCapabilities(clusterRequest.Forwarder, op)
 	g := forwardergenerator.New()
-	generatedConfig, err := g.GenerateConf(secrets, spec.Spec, spec.Namespace, spec.Name, resourceNames, op)
+	generatedConfig, err := g.GenerateConf(secrets, clf.Spec, clf.Namespace, clf.Name, resourceNames, op)
 
 	if err != nil {
 		log.Error(err, "Unable to generate log configuration")
@@ -149,6 +165,11 @@ func EvaluateAnnotationsForEnabledCapabilities(annotations map[string]string, op
 			if strings.ToLower(value) == "true" {
 				options[generatorhelpers.EnableDebugOutput] = "true"
 			}
+		case constants.AnnotationMaxUnavailable:
+			// Matching the validate_annotations logic
+			if observability.IsPercentOrWholeNumber(value) {
+				options[framework.MaxUnavailableOption] = value
+			}
 		}
 	}
 }
@@ -159,3 +180,12 @@ func LogLevel(annotations map[string]string) string {
 	}
 	return "warn"
 }
+
+func SetMaxUnavailableRolloutOption(annotations map[string]string, options framework.Options) {
+	if value, found := annotations[constants.AnnotationMaxUnavailable]; found {
+		if observability.IsPercentOrWholeNumber(value) {
+			log.V(3).Info("Max Unavailable annotation found")
+			options[framework.MaxUnavailableOption] = value
+		}
+	}
+}
diff --git a/internal/controller/observability/collector_features_test.go b/internal/controller/observability/collector_features_test.go
@@ -39,6 +39,10 @@ var _ = Describe("#EvaluateAnnotationsForEnabledCapabilities", func() {
 		Entry("enables debug for true", helpers.EnableDebugOutput, "true", AnnotationDebugOutput, "true"),
 		Entry("enables debug for True", helpers.EnableDebugOutput, "true", AnnotationDebugOutput, "True"),
 		Entry("disables debug for anything else", "", "", AnnotationDebugOutput, "abcdef"),
+
+		Entry("enables max-unavailable for value '10'", framework.MaxUnavailableOption, "10", AnnotationMaxUnavailable, "10"),
+		Entry("enables max-unavailable for value '99%'", framework.MaxUnavailableOption, "99%", AnnotationMaxUnavailable, "99%"),
+		Entry("disables max-unavailable option for anything not a number or percentage", "", "", AnnotationMaxUnavailable, "fluffy"),
 	)
 
 })