Skip to content

Commit 1854839

Browse files
authored
Merge pull request kubernetes#126067 from tenzen-y/implement-job-success-policy-e2e
Graduate the JobSuccessPolicy to Beta
2 parents 2171bcb + 0d4f18b commit 1854839

File tree

14 files changed

+374
-62
lines changed

14 files changed

+374
-62
lines changed

api/openapi-spec/swagger.json

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

api/openapi-spec/v3/apis__batch__v1_openapi.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -388,7 +388,7 @@
388388
"$ref": "#/components/schemas/io.k8s.api.batch.v1.SuccessPolicy"
389389
}
390390
],
391-
"description": "successPolicy specifies the policy when the Job can be declared as succeeded. If empty, the default behavior applies - the Job is declared as succeeded only when the number of succeeded pods equals to the completions. When the field is specified, it must be immutable and works only for the Indexed Jobs. Once the Job meets the SuccessPolicy, the lingering pods are terminated.\n\nThis field is alpha-level. To use this field, you must enable the `JobSuccessPolicy` feature gate (disabled by default)."
391+
"description": "successPolicy specifies the policy when the Job can be declared as succeeded. If empty, the default behavior applies - the Job is declared as succeeded only when the number of succeeded pods equals to the completions. When the field is specified, it must be immutable and works only for the Indexed Jobs. Once the Job meets the SuccessPolicy, the lingering pods are terminated.\n\nThis field is beta-level. To use this field, you must enable the `JobSuccessPolicy` feature gate (enabled by default)."
392392
},
393393
"suspend": {
394394
"description": "suspend specifies whether the Job controller should create Pods or not. If a Job is created with suspend set to true, no Pods are created by the Job controller. If a Job is suspended after creation (i.e. the flag goes from false to true), the Job controller will delete all active Pods associated with this Job. Users must design their workload to gracefully handle this. Suspending a Job will reset the StartTime field of the Job, effectively resetting the ActiveDeadlineSeconds timer too. Defaults to false.",

pkg/apis/batch/types.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -339,8 +339,8 @@ type JobSpec struct {
339339
// When the field is specified, it must be immutable and works only for the Indexed Jobs.
340340
// Once the Job meets the SuccessPolicy, the lingering pods are terminated.
341341
//
342-
// This field is alpha-level. To use this field, you must enable the
343-
// `JobSuccessPolicy` feature gate (disabled by default).
342+
// This field is beta-level. To use this field, you must enable the
343+
// `JobSuccessPolicy` feature gate (enabled by default).
344344
// +optional
345345
SuccessPolicy *SuccessPolicy
346346

pkg/controller/job/job_controller.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -988,7 +988,12 @@ func (jm *Controller) newSuccessCondition() *batch.JobCondition {
988988
if delayTerminalCondition() {
989989
cType = batch.JobSuccessCriteriaMet
990990
}
991-
return newCondition(cType, v1.ConditionTrue, "", "", jm.clock.Now())
991+
var reason, message string
992+
if feature.DefaultFeatureGate.Enabled(features.JobSuccessPolicy) {
993+
reason = batch.JobReasonCompletionsReached
994+
message = "Reached expected number of succeeded pods"
995+
}
996+
return newCondition(cType, v1.ConditionTrue, reason, message, jm.clock.Now())
992997
}
993998

994999
func delayTerminalCondition() bool {
@@ -1419,7 +1424,7 @@ func (jm *Controller) recordJobFinished(job *batch.Job, finishedCond *batch.JobC
14191424
jm.recorder.Event(job, v1.EventTypeWarning, "TooManySucceededPods", "Too many succeeded pods running after completion count reached")
14201425
}
14211426
jm.recorder.Event(job, v1.EventTypeNormal, "Completed", "Job completed")
1422-
metrics.JobFinishedNum.WithLabelValues(completionMode, "succeeded", "").Inc()
1427+
metrics.JobFinishedNum.WithLabelValues(completionMode, "succeeded", finishedCond.Reason).Inc()
14231428
} else {
14241429
jm.recorder.Event(job, v1.EventTypeWarning, finishedCond.Reason, finishedCond.Message)
14251430
metrics.JobFinishedNum.WithLabelValues(completionMode, "failed", finishedCond.Reason).Inc()

pkg/controller/job/job_controller_test.go

Lines changed: 67 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4991,6 +4991,45 @@ func TestSyncJobWithJobSuccessPolicy(t *testing.T) {
49914991
},
49924992
},
49934993
},
4994+
"job without successPolicy; jobSuccessPolicy is enabled; job got SuccessCriteriaMet and Completion with CompletionsReached reason conditions": {
4995+
enableJobSuccessPolicy: true,
4996+
enableJobManagedBy: true,
4997+
job: batch.Job{
4998+
TypeMeta: validTypeMeta,
4999+
ObjectMeta: validObjectMeta,
5000+
Spec: batch.JobSpec{
5001+
Selector: validSelector,
5002+
Template: validTemplate,
5003+
CompletionMode: ptr.To(batch.IndexedCompletion),
5004+
Completions: ptr.To[int32](1),
5005+
Parallelism: ptr.To[int32](1),
5006+
BackoffLimit: ptr.To[int32](math.MaxInt32),
5007+
},
5008+
},
5009+
pods: []v1.Pod{
5010+
*buildPod().uid("a1").index("0").phase(v1.PodSucceeded).trackingFinalizer().Pod,
5011+
},
5012+
wantStatus: batch.JobStatus{
5013+
Failed: 0,
5014+
Succeeded: 1,
5015+
CompletedIndexes: "0",
5016+
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
5017+
Conditions: []batch.JobCondition{
5018+
{
5019+
Type: batch.JobSuccessCriteriaMet,
5020+
Status: v1.ConditionTrue,
5021+
Reason: batch.JobReasonCompletionsReached,
5022+
Message: "Reached expected number of succeeded pods",
5023+
},
5024+
{
5025+
Type: batch.JobComplete,
5026+
Status: v1.ConditionTrue,
5027+
Reason: batch.JobReasonCompletionsReached,
5028+
Message: "Reached expected number of succeeded pods",
5029+
},
5030+
},
5031+
},
5032+
},
49945033
"when the JobSuccessPolicy is disabled, the Job never got SuccessCriteriaMet condition even if the Job has the successPolicy field": {
49955034
job: batch.Job{
49965035
TypeMeta: validTypeMeta,
@@ -5132,12 +5171,16 @@ func TestSyncJobWithJobBackoffLimitPerIndex(t *testing.T) {
51325171
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
51335172
Conditions: []batch.JobCondition{
51345173
{
5135-
Type: batch.JobSuccessCriteriaMet,
5136-
Status: v1.ConditionTrue,
5174+
Type: batch.JobSuccessCriteriaMet,
5175+
Status: v1.ConditionTrue,
5176+
Reason: batch.JobReasonCompletionsReached,
5177+
Message: "Reached expected number of succeeded pods",
51375178
},
51385179
{
5139-
Type: batch.JobComplete,
5140-
Status: v1.ConditionTrue,
5180+
Type: batch.JobComplete,
5181+
Status: v1.ConditionTrue,
5182+
Reason: batch.JobReasonCompletionsReached,
5183+
Message: "Reached expected number of succeeded pods",
51415184
},
51425185
},
51435186
},
@@ -7066,8 +7109,10 @@ func TestJobBackoffForOnFailure(t *testing.T) {
70667109
expectedFailed: 0,
70677110
expectedConditions: []batch.JobCondition{
70687111
{
7069-
Type: batch.JobComplete,
7070-
Status: v1.ConditionTrue,
7112+
Type: batch.JobComplete,
7113+
Status: v1.ConditionTrue,
7114+
Reason: batch.JobReasonCompletionsReached,
7115+
Message: "Reached expected number of succeeded pods",
70717116
},
70727117
},
70737118
},
@@ -7085,12 +7130,16 @@ func TestJobBackoffForOnFailure(t *testing.T) {
70857130
expectedFailed: 0,
70867131
expectedConditions: []batch.JobCondition{
70877132
{
7088-
Type: batch.JobSuccessCriteriaMet,
7089-
Status: v1.ConditionTrue,
7133+
Type: batch.JobSuccessCriteriaMet,
7134+
Status: v1.ConditionTrue,
7135+
Reason: batch.JobReasonCompletionsReached,
7136+
Message: "Reached expected number of succeeded pods",
70907137
},
70917138
{
7092-
Type: batch.JobComplete,
7093-
Status: v1.ConditionTrue,
7139+
Type: batch.JobComplete,
7140+
Status: v1.ConditionTrue,
7141+
Reason: batch.JobReasonCompletionsReached,
7142+
Message: "Reached expected number of succeeded pods",
70947143
},
70957144
},
70967145
},
@@ -7108,12 +7157,16 @@ func TestJobBackoffForOnFailure(t *testing.T) {
71087157
expectedFailed: 0,
71097158
expectedConditions: []batch.JobCondition{
71107159
{
7111-
Type: batch.JobSuccessCriteriaMet,
7112-
Status: v1.ConditionTrue,
7160+
Type: batch.JobSuccessCriteriaMet,
7161+
Status: v1.ConditionTrue,
7162+
Reason: batch.JobReasonCompletionsReached,
7163+
Message: "Reached expected number of succeeded pods",
71137164
},
71147165
{
7115-
Type: batch.JobComplete,
7116-
Status: v1.ConditionTrue,
7166+
Type: batch.JobComplete,
7167+
Status: v1.ConditionTrue,
7168+
Reason: batch.JobReasonCompletionsReached,
7169+
Message: "Reached expected number of succeeded pods",
71177170
},
71187171
},
71197172
},

pkg/controller/job/metrics/metrics.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,12 +55,14 @@ var (
5555
},
5656
[]string{"completion_mode", "result", "action"},
5757
)
58-
// JobFinishedNum tracks the number of Jobs that finish. Empty reason label
59-
// is used to count successful jobs.
58+
// JobFinishedNum tracks the number of Jobs that finish.
59+
// TODO: Once we remove the JobSuccessPolicy feature gate, we need to remove "" reason label comment.
60+
// When the JobSuccessPolicy feature gate is disabled, empty reason label is used to count successful jobs.
61+
// Otherwise, "CompletionsReached" reason label is used to count successful jobs.
6062
// Possible label values:
6163
// completion_mode: Indexed, NonIndexed
6264
// result: failed, succeeded
63-
// reason: "BackoffLimitExceeded", "DeadlineExceeded", "PodFailurePolicy", "FailedIndexes", "MaxFailedIndexesExceeded", ""
65+
// reason: "BackoffLimitExceeded", "DeadlineExceeded", "PodFailurePolicy", "FailedIndexes", "MaxFailedIndexesExceeded", "SuccessPolicy", "CompletionsReached", ""
6466
JobFinishedNum = metrics.NewCounterVec(
6567
&metrics.CounterOpts{
6668
Subsystem: JobControllerSubsystem,

pkg/features/kube_features.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,7 @@ const (
350350
// owner: @tenzen-y
351351
// kep: https://kep.k8s.io/3998
352352
// alpha: v1.30
353+
// beta: v1.31
353354
//
354355
// Allow users to specify when a Job can be declared as succeeded
355356
// based on the set of succeeded pods.
@@ -1066,7 +1067,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
10661067

10671068
JobPodReplacementPolicy: {Default: true, PreRelease: featuregate.Beta},
10681069

1069-
JobSuccessPolicy: {Default: false, PreRelease: featuregate.Alpha},
1070+
JobSuccessPolicy: {Default: true, PreRelease: featuregate.Beta},
10701071

10711072
KubeletCgroupDriverFromCRI: {Default: true, PreRelease: featuregate.Beta},
10721073

pkg/generated/openapi/zz_generated.openapi.go

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

staging/src/k8s.io/api/batch/v1/generated.proto

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

staging/src/k8s.io/api/batch/v1/types.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -347,8 +347,8 @@ type JobSpec struct {
347347
// When the field is specified, it must be immutable and works only for the Indexed Jobs.
348348
// Once the Job meets the SuccessPolicy, the lingering pods are terminated.
349349
//
350-
// This field is alpha-level. To use this field, you must enable the
351-
// `JobSuccessPolicy` feature gate (disabled by default).
350+
// This field is beta-level. To use this field, you must enable the
351+
// `JobSuccessPolicy` feature gate (enabled by default).
352352
// +optional
353353
SuccessPolicy *SuccessPolicy `json:"successPolicy,omitempty" protobuf:"bytes,16,opt,name=successPolicy"`
354354

@@ -649,8 +649,13 @@ const (
649649
// JobReasonSuccessPolicy reason indicates a SuccessCriteriaMet condition is added due to
650650
// a Job met successPolicy.
651651
// https://kep.k8s.io/3998
652-
// This is currently an alpha field.
652+
// This is currently a beta field.
653653
JobReasonSuccessPolicy string = "SuccessPolicy"
654+
// JobReasonCompletionsReached reason indicates a SuccessCriteriaMet condition is added due to
655+
// a number of succeeded Job pods met completions.
656+
// - https://kep.k8s.io/3998
657+
// This is currently a beta field.
658+
JobReasonCompletionsReached string = "CompletionsReached"
654659
)
655660

656661
// JobCondition describes current state of a job.

0 commit comments

Comments
 (0)