Skip to content

Commit

Permalink
TEP-0121: Reconciler Implementation
Browse files Browse the repository at this point in the history
Prior to this commit, `retries` logic for TaskRun is handled by
Tekton `PipelineRun` reconciler. This commit delegates the retries
implementation for `TaskRun` to the `TaskRun` reconciler.

The major change is we stopped relying on `len(retriesStatus)` to
decide if a target `TaskRun` failed or not. Instead, we use status
`ConditionSuceeded` to gate the completion of a `TaskRun`. Even a
`TaskRun` failed on one execution, as long as it has remaining
retries, the TaskRun won't be stored in etcd with its status set
as `Failed`. Instead, the status will be:

```yaml
Type: Succeeded
Status: Unknown
Reason: ToBeRetried
```
  • Loading branch information
XinruZhang committed Dec 8, 2022
1 parent 22d197e commit 13aab95
Show file tree
Hide file tree
Showing 10 changed files with 469 additions and 529 deletions.
10 changes: 10 additions & 0 deletions pkg/pod/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,16 @@ func DidTaskRunFail(pod *corev1.Pod) bool {
return f
}

// IsPodArchived indicates if a pod is archived in the retriesStatus.
func IsPodArchived(pod *corev1.Pod, trs *v1beta1.TaskRunStatus) bool {
for _, retryStatus := range trs.RetriesStatus {
if retryStatus.PodName == pod.GetName() {
return true
}
}
return false
}

func areStepsComplete(pod *corev1.Pod) bool {
stepsComplete := len(pod.Status.ContainerStatuses) > 0 && pod.Status.Phase == corev1.PodRunning
for _, s := range pod.Status.ContainerStatuses {
Expand Down
36 changes: 36 additions & 0 deletions pkg/pod/status_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1551,6 +1551,42 @@ func TestMarkStatusSuccess(t *testing.T) {
}
}

func TestIsPodArchived(t *testing.T) {
for _, tc := range []struct {
name string
podName string
retriesStatus []v1beta1.TaskRunStatus
wantIsArchived bool
}{{
name: "Pod is not in the retriesStatus",
podName: "pod",
retriesStatus: []v1beta1.TaskRunStatus{},
wantIsArchived: false,
}, {
name: "Pod is in the retriesStatus",
podName: "pod",
retriesStatus: []v1beta1.TaskRunStatus{{
TaskRunStatusFields: v1beta1.TaskRunStatusFields{
PodName: "pod",
}},
},
wantIsArchived: true,
}} {
t.Run(tc.name, func(t *testing.T) {
trs := v1beta1.TaskRunStatus{
TaskRunStatusFields: v1beta1.TaskRunStatusFields{
PodName: "pod",
RetriesStatus: tc.retriesStatus,
},
}
gotArchived := IsPodArchived(&corev1.Pod{ObjectMeta: metav1.ObjectMeta{Name: tc.podName}}, &trs)
if tc.wantIsArchived != gotArchived {
t.Errorf("IsPodArchived(): %v, expect %v", gotArchived, tc.wantIsArchived)
}
})
}
}

func statusRunning() duckv1.Status {
var trs v1beta1.TaskRunStatus
markStatusRunning(&trs, v1beta1.TaskRunReasonRunning.String(), "Not all Steps in the Task have finished executing")
Expand Down
31 changes: 2 additions & 29 deletions pkg/reconciler/pipelinerun/pipelinerun.go
Original file line number Diff line number Diff line change
Expand Up @@ -850,26 +850,10 @@ func (c *Reconciler) createTaskRuns(ctx context.Context, rpt *resources.Resolved
func (c *Reconciler) createTaskRun(ctx context.Context, taskRunName string, params []v1beta1.Param, rpt *resources.ResolvedPipelineTask, pr *v1beta1.PipelineRun, storageBasePath string) (*v1beta1.TaskRun, error) {
logger := logging.FromContext(ctx)

tr, _ := c.taskRunLister.TaskRuns(pr.Namespace).Get(taskRunName)
if tr != nil {
// retry should happen only when the taskrun has failed
if !tr.Status.GetCondition(apis.ConditionSucceeded).IsFalse() {
return tr, nil
}
// Don't modify the lister cache's copy.
tr = tr.DeepCopy()
// is a retry
addRetryHistory(tr)
clearStatus(tr)
tr.Status.MarkResourceOngoing("", "")
logger.Infof("Updating taskrun %s with cleared status and retry history (length: %d).", tr.GetName(), len(tr.Status.RetriesStatus))
return c.PipelineClientSet.TektonV1beta1().TaskRuns(pr.Namespace).UpdateStatus(ctx, tr, metav1.UpdateOptions{})
}

rpt.PipelineTask = resources.ApplyPipelineTaskContexts(rpt.PipelineTask)
taskRunSpec := pr.GetTaskRunSpec(rpt.PipelineTask.Name)
params = append(params, rpt.PipelineTask.Params...)
tr = &v1beta1.TaskRun{
tr := &v1beta1.TaskRun{
ObjectMeta: metav1.ObjectMeta{
Name: taskRunName,
Namespace: pr.Namespace,
Expand All @@ -878,6 +862,7 @@ func (c *Reconciler) createTaskRun(ctx context.Context, taskRunName string, para
Annotations: combineTaskRunAndTaskSpecAnnotations(pr, rpt.PipelineTask),
},
Spec: v1beta1.TaskRunSpec{
Retries: rpt.PipelineTask.Retries,
Params: params,
ServiceAccountName: taskRunSpec.TaskServiceAccountName,
PodTemplate: taskRunSpec.TaskPodTemplate,
Expand Down Expand Up @@ -1092,18 +1077,6 @@ func combinedSubPath(workspaceSubPath string, pipelineTaskSubPath string) string
return filepath.Join(workspaceSubPath, pipelineTaskSubPath)
}

func addRetryHistory(tr *v1beta1.TaskRun) {
newStatus := *tr.Status.DeepCopy()
newStatus.RetriesStatus = nil
tr.Status.RetriesStatus = append(tr.Status.RetriesStatus, newStatus)
}

func clearStatus(tr *v1beta1.TaskRun) {
tr.Status.StartTime = nil
tr.Status.CompletionTime = nil
tr.Status.PodName = ""
}

func getTaskrunAnnotations(pr *v1beta1.PipelineRun) map[string]string {
// Propagate annotations from PipelineRun to TaskRun.
annotations := make(map[string]string, len(pr.ObjectMeta.Annotations)+1)
Expand Down
124 changes: 24 additions & 100 deletions pkg/reconciler/pipelinerun/pipelinerun_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,7 @@ spec:
value: test-pipeline
- name: contextRetriesParam
value: "5"
retries: 5
resources:
inputs:
- name: workspace
Expand Down Expand Up @@ -3335,103 +3336,6 @@ spec:
}
}

// TestReconcileWithTimeoutAndRetry runs "Reconcile" against pipelines with
// retries and timeout settings, and status that represents different number of
// retries already performed. It verifies the reconciled status and events
// generated
func TestReconcileWithTimeoutAndRetry(t *testing.T) {
for _, tc := range []struct {
name string
retries int
conditionSucceeded corev1.ConditionStatus
wantEvents []string
}{{
name: "One try has to be done",
retries: 1,
conditionSucceeded: corev1.ConditionFalse,
wantEvents: []string{
"Warning Failed PipelineRun \"test-pipeline-retry-run-with-timeout\" failed to finish within",
},
}, {
name: "No more retries are needed",
retries: 2,
conditionSucceeded: corev1.ConditionUnknown,
wantEvents: []string{
"Warning Failed PipelineRun \"test-pipeline-retry-run-with-timeout\" failed to finish within",
},
}} {
t.Run(tc.name, func(t *testing.T) {
ps := []*v1beta1.Pipeline{parse.MustParseV1beta1Pipeline(t, fmt.Sprintf(`
metadata:
name: test-pipeline-retry
namespace: foo
spec:
tasks:
- name: hello-world-1
retries: %d
taskRef:
name: hello-world
`, tc.retries))}
prs := []*v1beta1.PipelineRun{parse.MustParseV1beta1PipelineRun(t, `
metadata:
name: test-pipeline-retry-run-with-timeout
namespace: foo
spec:
pipelineRef:
name: test-pipeline-retry
serviceAccountName: test-sa
timeout: 12h0m0s
status:
startTime: "2021-12-31T00:00:00Z"
`)}

ts := []*v1beta1.Task{
simpleHelloWorldTask,
}
trs := []*v1beta1.TaskRun{parse.MustParseV1beta1TaskRun(t, `
metadata:
name: hello-world-1
namespace: foo
status:
conditions:
- status: "False"
type: Succeeded
podName: my-pod-name
retriesStatus:
- conditions:
- status: "False"
type: Succeeded
`)}

prtrs := &v1beta1.PipelineRunTaskRunStatus{
PipelineTaskName: "hello-world-1",
Status: &trs[0].Status,
}
prs[0].Status.TaskRuns = make(map[string]*v1beta1.PipelineRunTaskRunStatus)
prs[0].Status.TaskRuns["hello-world-1"] = prtrs

d := test.Data{
PipelineRuns: prs,
Pipelines: ps,
Tasks: ts,
TaskRuns: trs,
}
prt := newPipelineRunTest(d, t)
defer prt.Cancel()

reconciledRun, _ := prt.reconcileRun("foo", "test-pipeline-retry-run-with-timeout", []string{}, false)

if len(reconciledRun.Status.TaskRuns["hello-world-1"].Status.RetriesStatus) != tc.retries {
t.Fatalf(" %d retries expected but got %d ", tc.retries, len(reconciledRun.Status.TaskRuns["hello-world-1"].Status.RetriesStatus))
}

if status := reconciledRun.Status.TaskRuns["hello-world-1"].Status.GetCondition(apis.ConditionSucceeded).Status; status != tc.conditionSucceeded {
t.Fatalf("Succeeded expected to be %s but is %s", tc.conditionSucceeded, status)
}
})
}
}

// TestReconcileAndPropagateCustomPipelineTaskRunSpec tests that custom PipelineTaskRunSpec declared
// in PipelineRun is propagated to created TaskRuns
func TestReconcileAndPropagateCustomPipelineTaskRunSpec(t *testing.T) {
Expand Down Expand Up @@ -9464,6 +9368,7 @@ spec:
"pr", "p", "platforms-and-browsers", false),
`
spec:
retries: 1
params:
- name: platform
value: linux
Expand All @@ -9478,12 +9383,17 @@ status:
conditions:
- type: Succeeded
status: "False"
retriesStatus:
- conditions:
- status: "False"
type: Succeeded
`),
mustParseTaskRunWithObjectMeta(t,
taskRunObjectMeta("pr-platforms-and-browsers-1", "foo",
"pr", "p", "platforms-and-browsers", false),
`
spec:
retries: 1
params:
- name: platform
value: mac
Expand Down Expand Up @@ -9598,6 +9508,7 @@ status:
"pr", "p", "platforms-and-browsers", false),
`
spec:
retries: 1
params:
- name: platform
value: linux
Expand All @@ -9611,7 +9522,7 @@ spec:
status:
conditions:
- type: Succeeded
status: "Unknown"
status: "False"
retriesStatus:
- conditions:
- status: "False"
Expand All @@ -9622,6 +9533,7 @@ status:
"pr", "p", "platforms-and-browsers", false),
`
spec:
retries: 1
params:
- name: platform
value: mac
Expand All @@ -9646,6 +9558,7 @@ status:
"pr", "p", "platforms-and-browsers", false),
`
spec:
retries: 1
params:
- name: platform
value: linux
Expand All @@ -9659,13 +9572,18 @@ spec:
status:
conditions:
- type: Succeeded
status: "False"
status: "Unknown"
retriesStatus:
- conditions:
- status: "False"
type: Succeeded
`),
mustParseTaskRunWithObjectMeta(t,
taskRunObjectMeta("pr-platforms-and-browsers-1", "foo",
"pr", "p", "platforms-and-browsers", false),
`
spec:
retries: 1
params:
- name: platform
value: mac
Expand All @@ -9679,7 +9597,11 @@ spec:
status:
conditions:
- type: Succeeded
status: "False"
status: "Unknown"
retriesStatus:
- conditions:
- status: "False"
type: Succeeded
`),
},
prs: []*v1beta1.PipelineRun{
Expand Down Expand Up @@ -9780,6 +9702,7 @@ status:
"pr", "p", "platforms-and-browsers", false),
`
spec:
retries: 1
params:
- name: platform
value: linux
Expand All @@ -9804,6 +9727,7 @@ status:
"pr", "p", "platforms-and-browsers", false),
`
spec:
retries: 1
params:
- name: platform
value: mac
Expand Down
Loading

0 comments on commit 13aab95

Please sign in to comment.