Skip to content

Commit

Permalink
fix: Workflow retry should also reset the selected nodes (#9156)
Browse files Browse the repository at this point in the history
Signed-off-by: Yuan Tang <[email protected]>
  • Loading branch information
terrytangyuan authored Jul 15, 2022
1 parent 559b59c commit 42729ff
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 8 deletions.
4 changes: 4 additions & 0 deletions workflow/util/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -796,6 +796,10 @@ func FormulateRetryWorkflow(ctx context.Context, wf *wfv1.Workflow, restartSucce
}
continue
}
if doForceResetNode {
newNode := node.DeepCopy()
newWF.Status.Nodes[newNode.ID] = resetNode(*newNode)
}
case wfv1.NodeError, wfv1.NodeFailed, wfv1.NodeOmitted:
if !strings.HasPrefix(node.Name, onExitNodeName) && (node.Type == wfv1.NodeTypeDAG || node.Type == wfv1.NodeTypeTaskGroup || node.Type == wfv1.NodeTypeStepGroup) {
newNode := node.DeepCopy()
Expand Down
50 changes: 42 additions & 8 deletions workflow/util/util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -887,27 +887,61 @@ func TestFormulateRetryWorkflow(t *testing.T) {

}
})
t.Run("Nested DAG", func(t *testing.T) {
t.Run("Nested DAG with Non-group Node Selected", func(t *testing.T) {
wf := &wfv1.Workflow{
ObjectMeta: metav1.ObjectMeta{
Name: "my-nested-dag",
Name: "my-nested-dag-1",
Labels: map[string]string{},
},
Status: wfv1.WorkflowStatus{
Phase: wfv1.WorkflowFailed,
Nodes: map[string]wfv1.NodeStatus{
"1": {ID: "1", Phase: wfv1.NodeSucceeded, Type: wfv1.NodeTypeTaskGroup},
"2": {ID: "2", Phase: wfv1.NodeSucceeded, Type: wfv1.NodeTypeTaskGroup, BoundaryID: "1"},
"3": {ID: "3", Phase: wfv1.NodeSucceeded, Type: wfv1.NodeTypePod, BoundaryID: "2"},
"4": {ID: "4", Phase: wfv1.NodeFailed, Type: wfv1.NodeTypePod, BoundaryID: "1"}},
"my-nested-dag-1": {ID: "my-nested-dag-1", Phase: wfv1.NodeSucceeded, Type: wfv1.NodeTypeTaskGroup},
"1": {ID: "1", Phase: wfv1.NodeSucceeded, Type: wfv1.NodeTypeTaskGroup, BoundaryID: "my-nested-dag-1"},
"2": {ID: "2", Phase: wfv1.NodeSucceeded, Type: wfv1.NodeTypeTaskGroup, BoundaryID: "1"},
"3": {ID: "3", Phase: wfv1.NodeSucceeded, Type: wfv1.NodeTypePod, BoundaryID: "2"},
"4": {ID: "4", Phase: wfv1.NodeFailed, Type: wfv1.NodeTypePod, BoundaryID: "1"}},
},
}
_, err := wfClient.Create(ctx, wf, metav1.CreateOptions{})
assert.NoError(t, err)
wf, _, err = FormulateRetryWorkflow(ctx, wf, false, "")
wf, _, err = FormulateRetryWorkflow(ctx, wf, true, "id=3")
if assert.NoError(t, err) {
if assert.Len(t, wf.Status.Nodes, 5) {
assert.Equal(t, wfv1.NodeSucceeded, wf.Status.Nodes["my-nested-dag-1"].Phase)
// These should all be running since the child node #3 belongs up to node #1.
assert.Equal(t, wfv1.NodeRunning, wf.Status.Nodes["1"].Phase)
assert.Equal(t, wfv1.NodeRunning, wf.Status.Nodes["2"].Phase)
assert.Equal(t, wfv1.NodeRunning, wf.Status.Nodes["3"].Phase)
assert.Equal(t, wfv1.NodeRunning, wf.Status.Nodes["4"].Phase)
}
}
})
t.Run("Nested DAG without Node Selected", func(t *testing.T) {
wf := &wfv1.Workflow{
ObjectMeta: metav1.ObjectMeta{
Name: "my-nested-dag-2",
Labels: map[string]string{},
},
Status: wfv1.WorkflowStatus{
Phase: wfv1.WorkflowFailed,
Nodes: map[string]wfv1.NodeStatus{
"my-nested-dag-2": {ID: "my-nested-dag-2", Phase: wfv1.NodeSucceeded, Type: wfv1.NodeTypeTaskGroup},
"1": {ID: "1", Phase: wfv1.NodeSucceeded, Type: wfv1.NodeTypeTaskGroup, BoundaryID: "my-nested-dag-2"},
"2": {ID: "2", Phase: wfv1.NodeSucceeded, Type: wfv1.NodeTypeTaskGroup, BoundaryID: "1"},
"3": {ID: "3", Phase: wfv1.NodeSucceeded, Type: wfv1.NodeTypePod, BoundaryID: "2"},
"4": {ID: "4", Phase: wfv1.NodeFailed, Type: wfv1.NodeTypePod, BoundaryID: "1"}},
},
}
_, err := wfClient.Create(ctx, wf, metav1.CreateOptions{})
assert.NoError(t, err)
wf, _, err = FormulateRetryWorkflow(ctx, wf, true, "")
if assert.NoError(t, err) {
if assert.Len(t, wf.Status.Nodes, 4) {
if assert.Len(t, wf.Status.Nodes, 5) {
assert.Equal(t, wfv1.NodeSucceeded, wf.Status.Nodes["my-nested-dag-2"].Phase)
// This should be running since it's node #4's parent node.
assert.Equal(t, wfv1.NodeRunning, wf.Status.Nodes["1"].Phase)
// This should be running since it's node #1's child node and node #1 is being retried.
assert.Equal(t, wfv1.NodeRunning, wf.Status.Nodes["2"].Phase)
assert.Equal(t, wfv1.NodeSucceeded, wf.Status.Nodes["3"].Phase)
assert.Equal(t, wfv1.NodeRunning, wf.Status.Nodes["4"].Phase)
Expand Down

0 comments on commit 42729ff

Please sign in to comment.