Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions api/observability/v1/conditions.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ package v1
import metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

const (

// ConditionTrue means the condition is met
ConditionTrue = metav1.ConditionTrue

Expand All @@ -30,8 +29,12 @@ const (
// ConditionTypeAuthorized identifies the state of authorization for the service
ConditionTypeAuthorized = GroupName + "/Authorized"

// ConditionTypeLogLevel validates the value of the log-level annotation
ConditionTypeLogLevel = GroupName + "/LogLevel"

// ConditionTypeMaxUnavailable validates the value of the max-unavailable-rollout annotation
ConditionTypeMaxUnavailable = GroupName + "/MaxUnavailableAnnotation"

// ConditionTypeReady indicates the service is ready.
//
// Ready=True means the operands are running and providing some service.
Expand Down Expand Up @@ -77,9 +80,12 @@ const (
// ReasonMissingSpec applies when a type is specified without a defined spec (e.g. type application without obs.Application)
ReasonMissingSpec = "MissingSpec"

// ReasonLogLevelSupported indicates the support for the log level annotation value
// ReasonLogLevelSupported indicates the support for the log-level annotation value
ReasonLogLevelSupported = "LogLevelSupported"

// ReasonMaxUnavailableSupported indicates the support for the max-unavailable-rollout annotation value
ReasonMaxUnavailableSupported = "MaxUnavailableAnnotationSupported"

// ReasonReconciliationComplete when the operator has initialized, validated, and deployed the resources for the workload
ReasonReconciliationComplete = "ReconciliationComplete"

Expand Down
2 changes: 1 addition & 1 deletion api/observability/v1/output_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -744,7 +744,7 @@ type Kafka struct {
Brokers []BrokerURL `json:"brokers,omitempty"`
}

// +kubebuilder:validation:XValidation:rule="self == '' || (isURL(self) && (self.startsWith('tcp://') || self.startsWith('tls://')))",message="each broker must be a valid URL with a tcp or tls scheme"
// +kubebuilder:validation:XValidation:rule="isURL(self) && (self.startsWith('tcp://') || self.startsWith('tls://'))",message="each broker must be a valid URL with a tcp or tls scheme"
type BrokerURL string

type LokiTuningSpec struct {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,21 @@ spec:
groups:
- name: logging_collector.alerts
rules:
- alert: ClusterLogForwarderDeprecations
annotations:
message: The Cluster Logging Operator version {{$labels.version}} includes
deprecations to some feature of ClusterLogForwarder.
summary: |-
The Cluster Logging Operator version {{$labels.version}} includes deprecations to some features of ClusterLogForwarder which
will be removed in a future release. Please see the release notes for details:
https://docs.redhat.com/en/documentation/red_hat_openshift_logging/6.2/html/release_notes
expr: |
max by (version) (csv_succeeded{exported_namespace="openshift-logging", name=~"cluster-logging.*", version=~"6.2.*"}) > 0
for: 1m
labels:
namespace: openshift-logging
service: collector
severity: info
- alert: CollectorNodeDown
annotations:
description: Prometheus could not scrape {{ $labels.namespace }}/{{ $labels.pod
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1627,8 +1627,7 @@ spec:
x-kubernetes-validations:
- message: each broker must be a valid URL with a tcp
or tls scheme
rule: self == '' || (isURL(self) && (self.startsWith('tcp://')
|| self.startsWith('tls://')))
rule: isURL(self) && (self.startsWith('tcp://') || self.startsWith('tls://'))
type: array
topic:
description: |-
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1627,8 +1627,7 @@ spec:
x-kubernetes-validations:
- message: each broker must be a valid URL with a tcp
or tls scheme
rule: self == '' || (isURL(self) && (self.startsWith('tcp://')
|| self.startsWith('tls://')))
rule: isURL(self) && (self.startsWith('tcp://') || self.startsWith('tls://'))
type: array
topic:
description: |-
Expand Down
14 changes: 14 additions & 0 deletions config/prometheus/collector_alerts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,20 @@ spec:
groups:
- name: logging_collector.alerts
rules:
- alert: ClusterLogForwarderDeprecations
annotations:
message: "The Cluster Logging Operator version {{$labels.version}} includes deprecations to some feature of ClusterLogForwarder."
summary: |-
The Cluster Logging Operator version {{$labels.version}} includes deprecations to some features of ClusterLogForwarder which
will be removed in a future release. Please see the release notes for details:
https://docs.redhat.com/en/documentation/red_hat_openshift_logging/6.2/html/release_notes
expr: |
max by (version) (csv_succeeded{exported_namespace="openshift-logging", name=~"cluster-logging.*", version=~"6.2.*"}) > 0
for: 1m
labels:
namespace: openshift-logging
service: collector
severity: info
- alert: CollectorNodeDown
annotations:
description: "Prometheus could not scrape {{ $labels.namespace }}/{{ $labels.pod }} collector component for more than 10m."
Expand Down
103 changes: 103 additions & 0 deletions docs/features/kube-api-annotations.adoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
== Reducing memory pressure on the Kubernetes API server
Steps to introduce rolling update configuration for the logging collector pods in large-scale clusters

IMPORTANT: The enablement of this feature using an annotation is deprecated and is https://issues.redhat.com/browse/LOG-7587[replaced] in a future release by
directly editing the ClusterLogForwarder spec.

=== Description
This feature enables the `use_apiserver_cache` config to the vector.toml, as well as a configurable rolling
update `maxUnavailable` to the forwarder's DaemonSet. The 'maxUnavailable' feature is enabled through an annotation.

==== Configuration
* Update your ClusterLogForwarder instance and include the following `metadata.annotations`:
+
[source,yaml]
----
observability.openshift.io/max-unavailable-rollout: <percentage_or_number>
----
+
.example forwarder
[source,yaml]
----
apiVersion: observability.openshift.io/v1
kind: ClusterLogForwarder
metadata:
annotations:
observability.openshift.io/max-unavailable-rollout: "20%"
name: my-forwarder
namespace: my-logging-namespace
spec:
...
----
NOTE: `max-unavailable-rollout` can be an absolute number (e.g., 1) or a percentage (e.g., 10%). The default is 100%.
+
If you need guidance on updating your forwarder instance, please see the sections below

==== Verifying
* The following commands can be used to verify the two options exist and are enabled:
+
.forwarder daemonset
[source,bash]
----
oc get ds <my-forwarder-name> -ojson | jq '.spec.updateStrategy'
----


===== Conditions
* You can verify there are no `False` conditions in the forwarder validation
+
.forwarder status
[source,bash]
----
oc get obsclf <my-forwarder-name> -ojson | jq 'items[0].status.conditions'
----
+
.invalid examples
[source,json]
----
{
"message": "max-unavailable-rollout value \"200%\" must be an absolute number or a valid percentage",
"reason": "MaxUnavailableAnnotationSupported",
"status": "False",
"type": "observability.openshift.io/MaxUnavailableAnnotation"
}

----
+
NOTE: The conditions for annotations only show when Invalid and the status is set to `False`. If there are no entries that mention
annotations, then either they were not found, or they are valid.

==== Other Commands
====
* You can add an annotation using `oc patch` on the clusterlogforwarder instance
+
.example command
[source,bash]
----
oc patch obsclf <my-forwarder-name> --type='merge' -p '{"metadata":{"annotations":{"observability.openshift.io/max-unavailable-rollout":"20%"}}}'
----
* Alternatively, you can pull down the forwarder instance and make your changes locally
+
[source,bash]
----
oc get obsclf <my-forwarder-name> -o yaml > my-forwarder.yaml
----
+
Then apply the local file
+
[source,bash]
----
oc apply -f my-forwarder.yaml
----
* You could also use `oc edit` directly on the instance
+
[source,bash]
----
oc edit obsclf <my-forwarder-name>
----
====

==== References
* Annotation Implemented: https://issues.redhat.com/browse/LOG-7196
* Knowledgebase Article: https://access.redhat.com/solutions/7121949
* Upstream Fix: https://github.com/vectordotdev/vector/pull/17095/files
7 changes: 5 additions & 2 deletions internal/collector/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ type Factory struct {
ResourceNames *factory.ForwarderResourceNames
isDaemonset bool
LogLevel string
UseKubeCache bool
MaxUnavailable string
}

// CollectorResourceRequirements returns the resource requirements for a given collector implementation
Expand All @@ -85,7 +87,7 @@ func (f *Factory) Tolerations() []v1.Toleration {
return f.CollectorSpec.Tolerations
}

func New(confHash, clusterID string, collectorSpec *obs.CollectorSpec, secrets internalobs.Secrets, configMaps map[string]*v1.ConfigMap, forwarderSpec obs.ClusterLogForwarderSpec, resNames *factory.ForwarderResourceNames, isDaemonset bool, logLevel string) *Factory {
func New(confHash, clusterID string, collectorSpec *obs.CollectorSpec, secrets internalobs.Secrets, configMaps map[string]*v1.ConfigMap, forwarderSpec obs.ClusterLogForwarderSpec, resNames *factory.ForwarderResourceNames, isDaemonset bool, logLevel string, maxUnavailable string) *Factory {
if collectorSpec == nil {
collectorSpec = &obs.CollectorSpec{}
}
Expand All @@ -105,13 +107,14 @@ func New(confHash, clusterID string, collectorSpec *obs.CollectorSpec, secrets i
PodLabelVisitor: vector.PodLogExcludeLabel,
isDaemonset: isDaemonset,
LogLevel: logLevel,
MaxUnavailable: maxUnavailable,
}
return factory
}

func (f *Factory) NewDaemonSet(namespace, name string, trustedCABundle *v1.ConfigMap, tlsProfileSpec configv1.TLSProfileSpec) *apps.DaemonSet {
podSpec := f.NewPodSpec(trustedCABundle, f.ForwarderSpec, f.ClusterID, tlsProfileSpec, namespace)
ds := factory.NewDaemonSet(namespace, name, name, constants.CollectorName, constants.VectorName, *podSpec, f.CommonLabelInitializer, f.PodLabelVisitor)
ds := factory.NewDaemonSet(namespace, name, name, constants.CollectorName, constants.VectorName, f.MaxUnavailable, *podSpec, f.CommonLabelInitializer, f.PodLabelVisitor)
ds.Spec.Template.Annotations[constants.AnnotationSecretHash] = f.Secrets.Hash64a()
return ds
}
Expand Down
4 changes: 4 additions & 0 deletions internal/constants/annotations.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,8 @@ const (
AnnotationOtlpOutputTechPreview = "observability.openshift.io/tech-preview-otlp-output"

AnnotationSecretHash = "observability.openshift.io/secret-hash"

// AnnotationMaxUnavailable (Deprecated) configures the maximum number of DaemonSet pods that can be unavailable during a rolling update.
// This can be an absolute number (e.g., 1) or a percentage (e.g., 10%). Default is 100%.
AnnotationMaxUnavailable = "observability.openshift.io/max-unavailable-rollout"
)
46 changes: 38 additions & 8 deletions internal/controller/observability/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"github.com/openshift/cluster-logging-operator/internal/runtime/serviceaccount"
"github.com/openshift/cluster-logging-operator/internal/tls"
"github.com/openshift/cluster-logging-operator/internal/utils"
"github.com/openshift/cluster-logging-operator/internal/validations/observability"
corev1 "k8s.io/api/core/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
)
Expand All @@ -42,6 +43,9 @@ func ReconcileCollector(context internalcontext.ForwarderContext, pollInterval,
options = context.AdditionalContext
}

// Set rollout options based on annotation (LOG-7196)
SetMaxUnavailableRolloutOption(context.Forwarder.Annotations, options)

if internalobs.Outputs(context.Forwarder.Spec.Outputs).NeedServiceAccountToken() {
// temporarily create SA token until collector is capable of dynamically reloading a projected serviceaccount token
var sa *corev1.ServiceAccount
Expand Down Expand Up @@ -88,28 +92,40 @@ func ReconcileCollector(context internalcontext.ForwarderContext, pollInterval,

isDaemonSet := !internalobs.DeployAsDeployment(*context.Forwarder)
log.V(3).Info("Deploying as DaemonSet", "isDaemonSet", isDaemonSet)
factory := collector.New(collectorConfHash, context.ClusterID, context.Forwarder.Spec.Collector, context.Secrets, context.ConfigMaps, context.Forwarder.Spec, resourceNames, isDaemonSet, LogLevel(context.Forwarder.Annotations))
if err = factory.ReconcileCollectorConfig(context.Client, context.Reader, context.Forwarder.Namespace, collectorConfig, ownerRef); err != nil {
collectorFactory := collector.New(
collectorConfHash,
context.ClusterID,
context.Forwarder.Spec.Collector,
context.Secrets, context.ConfigMaps,
context.Forwarder.Spec,
resourceNames,
isDaemonSet,
LogLevel(context.Forwarder.Annotations),
factory.GetMaxUnavailableValue(options),
)

if err = collectorFactory.ReconcileCollectorConfig(context.Client, context.Reader, context.Forwarder.Namespace, collectorConfig, ownerRef); err != nil {
log.Error(err, "collector.ReconcileCollectorConfig")
return
}

reconcileWorkload := factory.ReconcileDaemonset
reconcileWorkload := collectorFactory.ReconcileDaemonset
if !isDaemonSet {
reconcileWorkload = factory.ReconcileDeployment
reconcileWorkload = collectorFactory.ReconcileDeployment
}

if err := reconcileWorkload(context.Client, context.Forwarder.Namespace, trustedCABundle, ownerRef); err != nil {
log.Error(err, "Error reconciling the deployment of the collector")
return err
}

if err := factory.ReconcileInputServices(context.Client, context.Reader, context.Forwarder.Namespace, ownerRef, factory.CommonLabelInitializer); err != nil {
if err := collectorFactory.ReconcileInputServices(context.Client, context.Reader, context.Forwarder.Namespace, ownerRef, collectorFactory.CommonLabelInitializer); err != nil {
log.Error(err, "collector.ReconcileInputServices")
return err
}

// Reconcile resources to support metrics gathering
if err := network.ReconcileService(context.Client, context.Forwarder.Namespace, resourceNames.CommonName, context.Forwarder.Name, constants.CollectorName, collector.MetricsPortName, resourceNames.SecretMetrics, collector.MetricsPort, ownerRef, factory.CommonLabelInitializer); err != nil {
if err := network.ReconcileService(context.Client, context.Forwarder.Namespace, resourceNames.CommonName, context.Forwarder.Name, constants.CollectorName, collector.MetricsPortName, resourceNames.SecretMetrics, collector.MetricsPort, ownerRef, collectorFactory.CommonLabelInitializer); err != nil {
log.Error(err, "collector.ReconcileService")
return err
}
Expand All @@ -122,12 +138,12 @@ func ReconcileCollector(context internalcontext.ForwarderContext, pollInterval,
return nil
}

func GenerateConfig(k8Client client.Client, spec obs.ClusterLogForwarder, resourceNames factory.ForwarderResourceNames, secrets internalobs.Secrets, op framework.Options) (config string, err error) {
func GenerateConfig(k8Client client.Client, clf obs.ClusterLogForwarder, resourceNames factory.ForwarderResourceNames, secrets internalobs.Secrets, op framework.Options) (config string, err error) {
tlsProfile, _ := tls.FetchAPIServerTlsProfile(k8Client)
op[framework.ClusterTLSProfileSpec] = tls.GetClusterTLSProfileSpec(tlsProfile)
//EvaluateAnnotationsForEnabledCapabilities(clusterRequest.Forwarder, op)
g := forwardergenerator.New()
generatedConfig, err := g.GenerateConf(secrets, spec.Spec, spec.Namespace, spec.Name, resourceNames, op)
generatedConfig, err := g.GenerateConf(secrets, clf.Spec, clf.Namespace, clf.Name, resourceNames, op)

if err != nil {
log.Error(err, "Unable to generate log configuration")
Expand All @@ -149,6 +165,11 @@ func EvaluateAnnotationsForEnabledCapabilities(annotations map[string]string, op
if strings.ToLower(value) == "true" {
options[generatorhelpers.EnableDebugOutput] = "true"
}
case constants.AnnotationMaxUnavailable:
// Matching the validate_annotations logic
if observability.IsPercentOrWholeNumber(value) {
options[framework.MaxUnavailableOption] = value
}
}
}
}
Expand All @@ -159,3 +180,12 @@ func LogLevel(annotations map[string]string) string {
}
return "warn"
}

func SetMaxUnavailableRolloutOption(annotations map[string]string, options framework.Options) {
if value, found := annotations[constants.AnnotationMaxUnavailable]; found {
if observability.IsPercentOrWholeNumber(value) {
log.V(3).Info("Max Unavailable annotation found")
options[framework.MaxUnavailableOption] = value
}
}
}
4 changes: 4 additions & 0 deletions internal/controller/observability/collector_features_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ var _ = Describe("#EvaluateAnnotationsForEnabledCapabilities", func() {
Entry("enables debug for true", helpers.EnableDebugOutput, "true", AnnotationDebugOutput, "true"),
Entry("enables debug for True", helpers.EnableDebugOutput, "true", AnnotationDebugOutput, "True"),
Entry("disables debug for anything else", "", "", AnnotationDebugOutput, "abcdef"),

Entry("enables max-unavailable for value '10'", framework.MaxUnavailableOption, "10", AnnotationMaxUnavailable, "10"),
Entry("enables max-unavailable for value '99%'", framework.MaxUnavailableOption, "99%", AnnotationMaxUnavailable, "99%"),
Entry("disables max-unavailable option for anything not a number or percentage", "", "", AnnotationMaxUnavailable, "fluffy"),
)

})
Loading