Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

count unknown pod as [Unknown] in job status #289

Merged
merged 1 commit into from
Jul 5, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions pkg/apis/batch/v1alpha1/job.go
Original file line number Diff line number Diff line change
Expand Up @@ -285,15 +285,19 @@ type JobStatus struct {
// +optional
Terminating int32 `json:"terminating,omitempty" protobuf:"bytes,7,opt,name=terminating"`

// The number of pods which reached phase Unknown.
// +optional
Unknown int32 `json:"unknown,omitempty" protobuf:"bytes,8,opt,name=unknown"`

//Current version of job
Version int32 `json:"version,omitempty" protobuf:"bytes,8,opt,name=version"`
Version int32 `json:"version,omitempty" protobuf:"bytes,9,opt,name=version"`

// The number of Job retries.
// +optional
RetryCount int32 `json:"retryCount,omitempty" protobuf:"bytes,9,opt,name=retryCount"`
RetryCount int32 `json:"retryCount,omitempty" protobuf:"bytes,10,opt,name=retryCount"`

// The resources that controlled by this job, e.g. Service, ConfigMap
ControlledResources map[string]string `json:"controlledResources,omitempty" protobuf:"bytes,8,opt,name=controlledResources"`
ControlledResources map[string]string `json:"controlledResources,omitempty" protobuf:"bytes,11,opt,name=controlledResources"`
}

// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
Expand Down
10 changes: 6 additions & 4 deletions pkg/cli/job/list.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ const (
Version string = "Version"
// Failed failed
Failed string = "Failed"
// Unknown pod
Unknown string = "Unknown"
// RetryCount retry count
RetryCount string = "RetryCount"
// JobType job type
Expand Down Expand Up @@ -110,8 +112,8 @@ func ListJobs() error {
// PrintJobs prints all jobs details
func PrintJobs(jobs *v1alpha1.JobList, writer io.Writer) {
maxNameLen := getMaxNameLen(jobs)
_, err := fmt.Fprintf(writer, fmt.Sprintf("%%-%ds%%-25s%%-12s%%-12s%%-12s%%-6s%%-10s%%-10s%%-12s%%-10s%%-12s\n", maxNameLen),
Name, Creation, Phase, JobType, Replicas, Min, Pending, Running, Succeeded, Failed, RetryCount)
_, err := fmt.Fprintf(writer, fmt.Sprintf("%%-%ds%%-25s%%-12s%%-12s%%-12s%%-6s%%-10s%%-10s%%-12s%%-10s%%-12s%%-10s\n", maxNameLen),
Name, Creation, Phase, JobType, Replicas, Min, Pending, Running, Succeeded, Failed, Unknown, RetryCount)
if err != nil {
fmt.Printf("Failed to print list command result: %s.\n", err)
}
Expand All @@ -131,9 +133,9 @@ func PrintJobs(jobs *v1alpha1.JobList, writer io.Writer) {
if jobType == "" {
jobType = "Batch"
}
_, err = fmt.Fprintf(writer, fmt.Sprintf("%%-%ds%%-25s%%-12s%%-12s%%-12d%%-6d%%-10d%%-10d%%-12d%%-10d%%-12d\n", maxNameLen),
_, err = fmt.Fprintf(writer, fmt.Sprintf("%%-%ds%%-25s%%-12s%%-12s%%-12d%%-6d%%-10d%%-10d%%-12d%%-10d%%-12d%%-10d\n", maxNameLen),
job.Name, job.CreationTimestamp.Format("2006-01-02 15:04:05"), job.Status.State.Phase, jobType, replicas,
job.Status.MinAvailable, job.Status.Pending, job.Status.Running, job.Status.Succeeded, job.Status.Failed, job.Status.RetryCount)
job.Status.MinAvailable, job.Status.Pending, job.Status.Running, job.Status.Succeeded, job.Status.Failed, job.Status.Unknown, job.Status.RetryCount)
if err != nil {
fmt.Printf("Failed to print list command result: %s.\n", err)
}
Expand Down
3 changes: 3 additions & 0 deletions pkg/cli/job/view.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,9 @@ func PrintJobInfo(job *v1alpha1.Job, writer io.Writer) {
if job.Status.Terminating > 0 {
WriteLine(writer, Level1, "Terminating: \t%d\n", job.Status.Terminating)
}
if job.Status.Unknown > 0 {
WriteLine(writer, Level1, "Unknown: \t%d\n", job.Status.Unknown)
}
if job.Status.RetryCount > 0 {
WriteLine(writer, Level1, "RetryCount: \t%d\n", job.Status.RetryCount)
}
Expand Down
1 change: 1 addition & 0 deletions pkg/cli/job/view_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ func TestViewJob(t *testing.T) {
Pending: 3,
Running: 1,
Failed: 2,
Unknown: 10,
Terminating: 4,
RetryCount: 5,
MinAvailable: 6,
Expand Down
49 changes: 24 additions & 25 deletions pkg/controllers/job/job_controller_actions.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ func (cc *Controller) killJob(jobInfo *apis.JobInfo, podRetainPhase state.PhaseM
return nil
}

var pending, running, terminating, succeeded, failed int32
var pending, running, terminating, succeeded, failed, unknown int32

var errs []error
var total int
Expand Down Expand Up @@ -76,16 +76,7 @@ func (cc *Controller) killJob(jobInfo *apis.JobInfo, podRetainPhase state.PhaseM
cc.resyncTask(pod)
}

switch pod.Status.Phase {
case v1.PodRunning:
running++
case v1.PodPending:
pending++
case v1.PodSucceeded:
succeeded++
case v1.PodFailed:
failed++
}
classifyAndAddUpPodBaseOnPhase(pod, &pending, &running, &succeeded, &failed, &unknown)
}
}

Expand All @@ -108,6 +99,7 @@ func (cc *Controller) killJob(jobInfo *apis.JobInfo, podRetainPhase state.PhaseM
Succeeded: succeeded,
Failed: failed,
Terminating: terminating,
Unknown: unknown,
Version: job.Status.Version,
MinAvailable: int32(job.Spec.MinAvailable),
RetryCount: job.Status.RetryCount,
Expand Down Expand Up @@ -217,7 +209,7 @@ func (cc *Controller) syncJob(jobInfo *apis.JobInfo, updateStatus state.UpdateSt
return nil
}

var running, pending, terminating, succeeded, failed int32
var running, pending, terminating, succeeded, failed, unknown int32

var podToCreate []*v1.Pod
var podToDelete []*v1.Pod
Expand Down Expand Up @@ -250,16 +242,7 @@ func (cc *Controller) syncJob(jobInfo *apis.JobInfo, updateStatus state.UpdateSt
continue
}

switch pod.Status.Phase {
case v1.PodPending:
pending++
case v1.PodRunning:
running++
case v1.PodSucceeded:
succeeded++
case v1.PodFailed:
failed++
}
classifyAndAddUpPodBaseOnPhase(pod, &pending, &running, &succeeded, &failed, &unknown)
}
}

Expand All @@ -273,7 +256,7 @@ func (cc *Controller) syncJob(jobInfo *apis.JobInfo, updateStatus state.UpdateSt
for _, pod := range podToCreate {
go func(pod *v1.Pod) {
defer waitCreationGroup.Done()
_, err := cc.kubeClients.CoreV1().Pods(pod.Namespace).Create(pod)
newPod, err := cc.kubeClients.CoreV1().Pods(pod.Namespace).Create(pod)
if err != nil && !apierrors.IsAlreadyExists(err) {
// Failed to create Pod, waitCreationGroup a moment and then create it again
// This is to ensure all podsMap under the same Job created
Expand All @@ -286,8 +269,7 @@ func (cc *Controller) syncJob(jobInfo *apis.JobInfo, updateStatus state.UpdateSt
cc.resyncTask(pod)
}

// TODO: maybe not pending status, maybe unknown.
pending++
classifyAndAddUpPodBaseOnPhase(newPod, &pending, &running, &succeeded, &failed, &unknown)
glog.V(3).Infof("Created Task <%s> of Job <%s/%s>",
pod.Name, job.Namespace, job.Name)
}
Expand Down Expand Up @@ -340,6 +322,7 @@ func (cc *Controller) syncJob(jobInfo *apis.JobInfo, updateStatus state.UpdateSt
Succeeded: succeeded,
Failed: failed,
Terminating: terminating,
Unknown: unknown,
Version: job.Status.Version,
MinAvailable: int32(job.Spec.MinAvailable),
ControlledResources: job.Status.ControlledResources,
Expand Down Expand Up @@ -559,3 +542,19 @@ func (cc *Controller) initJobStatus(job *vkv1.Job) (*vkv1.Job, error) {

return newJob, nil
}

func classifyAndAddUpPodBaseOnPhase(pod *v1.Pod, pending, running, succeeded, failed, unknown *int32) {
switch pod.Status.Phase {
case v1.PodPending:
*pending++
case v1.PodRunning:
*running++
case v1.PodSucceeded:
*succeeded++
case v1.PodFailed:
*failed++
default:
*unknown++
}
return
}