Skip to content

Commit

Permalink
Fix race condition issue
Browse files Browse the repository at this point in the history
  • Loading branch information
TommyLike committed Jul 26, 2019
1 parent 9d675f1 commit 07c5adb
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 1 deletion.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ generate-code:
./hack/update-gencode.sh

unit-test:
go list ./... | grep -v e2e | xargs go test -v -cover -covermode atomic -coverprofile coverage.txt
go list ./... | grep -v e2e | xargs go test -v -cover -covermode atomic -coverprofile coverage.txt -race

e2e-test-kind:
./hack/run-e2e-kind.sh
Expand Down
9 changes: 9 additions & 0 deletions pkg/controllers/job/job_controller_actions.go
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,7 @@ func (cc *Controller) syncJob(jobInfo *apis.JobInfo, updateStatus state.UpdateSt

waitCreationGroup := sync.WaitGroup{}
waitCreationGroup.Add(len(podToCreate))
stateMutex := sync.Mutex{}
for _, pod := range podToCreate {
go func(pod *v1.Pod) {
defer waitCreationGroup.Done()
Expand All @@ -263,13 +264,17 @@ func (cc *Controller) syncJob(jobInfo *apis.JobInfo, updateStatus state.UpdateSt
// So gang-scheduling could schedule the Job successfully
glog.Errorf("Failed to create pod %s for Job %s, err %#v",
pod.Name, job.Name, err)
stateMutex.Lock()
creationErrs = append(creationErrs, fmt.Errorf("failed to create pod %s, err: %#v", pod.Name, err))
stateMutex.Unlock()
} else {
if err != nil && apierrors.IsAlreadyExists(err) {
cc.resyncTask(pod)
}

stateMutex.Lock()
classifyAndAddUpPodBaseOnPhase(newPod, &pending, &running, &succeeded, &failed, &unknown)
stateMutex.Unlock()
glog.V(3).Infof("Created Task <%s> of Job <%s/%s>",
pod.Name, job.Namespace, job.Name)
}
Expand Down Expand Up @@ -297,12 +302,16 @@ func (cc *Controller) syncJob(jobInfo *apis.JobInfo, updateStatus state.UpdateSt
// So gang-scheduling could schedule the Job successfully
glog.Errorf("Failed to delete pod %s for Job %s, err %#v",
pod.Name, job.Name, err)
stateMutex.Lock()
deletionErrs = append(deletionErrs, err)
stateMutex.Unlock()
cc.resyncTask(pod)
} else {
glog.V(3).Infof("Deleted Task <%s> of Job <%s/%s>",
pod.Name, job.Namespace, job.Name)
stateMutex.Lock()
terminating++
stateMutex.Unlock()
}
}(pod)
}
Expand Down

0 comments on commit 07c5adb

Please sign in to comment.