Skip to content

Commit

Permalink
Fix Alluxio master in HA mode start error. (#3658)
Browse files Browse the repository at this point in the history
* fix alluxio ha master start error

Signed-off-by: xliuqq <[email protected]>

* move log inside the ha judgement block

Signed-off-by: xliuqq <[email protected]>

* fix alluxio ha master unit test

Signed-off-by: xliuqq <[email protected]>

---------

Signed-off-by: xliuqq <[email protected]>
  • Loading branch information
xliuqq authored Dec 21, 2023
1 parent 69539a3 commit bbc940e
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 2 deletions.
3 changes: 2 additions & 1 deletion charts/alluxio/templates/master/statefulset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,8 @@ spec:
args:
{{ toYaml .Values.master.args | trim | indent 12 }}
{{- end }}
{{- if eq .Values.master.mountConfigStorage "configmap"}}
# Ha Master mode will connect to each other to get ready, so can not use probe, use controller to mount.
{{- if and (eq .Values.master.mountConfigStorage "configmap") ($isSingleMaster) }}
{{- if semverCompare ">=1.18.0-0" .Capabilities.KubeVersion.Version }}
startupProbe:
exec:
Expand Down
19 changes: 19 additions & 0 deletions pkg/ddc/alluxio/ufs.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package alluxio
import (
"fmt"
datav1alpha1 "github.com/fluid-cloudnative/fluid/api/v1alpha1"
"github.com/fluid-cloudnative/fluid/pkg/ddc/alluxio/operations"
"github.com/fluid-cloudnative/fluid/pkg/utils"
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"os"
Expand Down Expand Up @@ -82,7 +83,25 @@ func (e *AlluxioEngine) PrepareUFS() (err error) {
}
}
e.Log.Info("mountUFS")
} else {
// for multiple master, can not use startup probe/post start, mount in the controller.
runtime, err := e.getRuntime()
if err != nil {
return err
}
replicas := runtime.Spec.Master.Replicas
if replicas > 1 {
// Mount UFS (Synchronous Operation)
podName, containerName := e.getMasterPodInfo()
fileUtils := operations.NewAlluxioFileUtils(podName, containerName, e.namespace, e.Log)
err = fileUtils.ExecMountScripts()
if err != nil {
return err
}
e.Log.Info("mountUFS for ha master")
}
}

err = e.SyncMetadata()
if err != nil {
// just report this error and ignore it because SyncMetadata isn't on the critical path of Setup
Expand Down
65 changes: 64 additions & 1 deletion pkg/ddc/alluxio/ufs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,65 @@ func TestPrepareUFS(t *testing.T) {
{
name: "test",
fields: fields{
runtime: &datav1alpha1.AlluxioRuntime{},
runtime: &datav1alpha1.AlluxioRuntime{
ObjectMeta: v1.ObjectMeta{
Name: "spark",
Namespace: "default",
},
},
master: &appsv1.StatefulSet{
ObjectMeta: v1.ObjectMeta{
Name: "hbase-master",
Namespace: "fluid",
},
Spec: appsv1.StatefulSetSpec{
Replicas: utilpointer.Int32(2),
},
Status: appsv1.StatefulSetStatus{
Replicas: 3,
ReadyReplicas: 2,
},
},
dataset: &datav1alpha1.Dataset{
ObjectMeta: v1.ObjectMeta{
Name: "spark",
Namespace: "default",
},
Spec: datav1alpha1.DatasetSpec{
Mounts: []datav1alpha1.Mount{
{
MountPoint: "cosn://imagenet-1234567/",
},
},
DataRestoreLocation: &datav1alpha1.DataRestoreLocation{
Path: "local:///tmp/restore",
NodeName: "192.168.0.1",
},
},
Status: datav1alpha1.DatasetStatus{
UfsTotal: "",
},
},
name: "spark",
namespace: "default",
Log: fake.NullLogger(),
},
wantErr: false,
},
{
name: "ha master",
fields: fields{
runtime: &datav1alpha1.AlluxioRuntime{
ObjectMeta: v1.ObjectMeta{
Name: "spark",
Namespace: "default",
},
Spec: datav1alpha1.AlluxioRuntimeSpec{
Master: datav1alpha1.AlluxioCompTemplateSpec{
Replicas: 3,
},
},
},
master: &appsv1.StatefulSet{
ObjectMeta: v1.ObjectMeta{
Name: "hbase-master",
Expand Down Expand Up @@ -321,6 +379,11 @@ func TestPrepareUFS(t *testing.T) {
})
defer patch4.Reset()

patch5 := ApplyMethod(reflect.TypeOf(afsUtils), "ExecMountScripts", func(_ operations.AlluxioFileUtils) error {
return nil
})
defer patch5.Reset()

e := &AlluxioEngine{
runtime: tt.fields.runtime,
name: tt.fields.name,
Expand Down

0 comments on commit bbc940e

Please sign in to comment.