From 13d117bebb9466a9582cd4346ebe0b85ce000ab6 Mon Sep 17 00:00:00 2001 From: Artur Troian Date: Sat, 9 Dec 2023 09:42:24 -0500 Subject: [PATCH] feat(sdl): parse amd gpu refs akash-network/support#142 Signed-off-by: Artur Troian --- sdl/gpu.go | 65 ++++++---- sdl/gpu_test.go | 313 +++++++++++++++++++++++++++++++++--------------- sdl/units.go | 6 +- 3 files changed, 264 insertions(+), 120 deletions(-) diff --git a/sdl/gpu.go b/sdl/gpu.go index 1bb6b12556..fecfbcec20 100644 --- a/sdl/gpu.go +++ b/sdl/gpu.go @@ -1,6 +1,7 @@ package sdl import ( + "errors" "fmt" "sort" @@ -9,25 +10,28 @@ import ( types "github.com/akash-network/akash-api/go/node/types/v1beta3" ) -type v2GPUNvidia struct { +var ( + ErrResourceGPUEmptyVendors = errors.New("sdl: invalid GPU attributes. at least one vendor must be set") +) + +type v2GPU struct { Model string `yaml:"model"` RAM *memoryQuantity `yaml:"ram,omitempty"` } -func (sdl *v2GPUNvidia) String() string { +func (sdl *v2GPU) String() string { key := sdl.Model if sdl.RAM != nil { - key += "/" + sdl.RAM.StringWithSuffix("Gi") + key += "/ram/" + sdl.RAM.StringWithSuffix("Gi") } return key } -type v2GPUsNvidia []v2GPUNvidia +type v2GPUs []v2GPU -type gpuVendor struct { - Nvidia v2GPUsNvidia `yaml:"nvidia,omitempty"` -} +type gpuVendors map[string]v2GPUs +type gpuVendorAttributes map[string]types.Attributes type v2GPUAttributes types.Attributes @@ -66,12 +70,12 @@ func (sdl *v2ResourceGPU) UnmarshalYAML(node *yaml.Node) error { func (sdl *v2GPUAttributes) UnmarshalYAML(node *yaml.Node) error { var res types.Attributes - var vendor *gpuVendor + vendors := make(gpuVendors) for i := 0; i < len(node.Content); i += 2 { switch node.Content[i].Value { case "vendor": - if err := node.Content[i+1].Decode(&vendor); err != nil { + if err := node.Content[i+1].Decode(&vendors); err != nil { return err } default: @@ -79,24 +83,41 @@ func (sdl *v2GPUAttributes) UnmarshalYAML(node *yaml.Node) error { } } - if vendor == nil { - return fmt.Errorf("sdl: invalid GPU attributes. at least one vendor must be set") + if len(vendors) == 0 { + return ErrResourceGPUEmptyVendors } - res = make(types.Attributes, 0, len(vendor.Nvidia)) + resPrealloc := 0 - for _, model := range vendor.Nvidia { - res = append(res, types.Attribute{ - Key: fmt.Sprintf("vendor/nvidia/model/%s", model.String()), - Value: "true", - }) + for _, models := range vendors { + if len(models) == 0 { + resPrealloc += 1 + } else { + resPrealloc += len(models) + } } - if len(res) == 0 { - res = append(res, types.Attribute{ - Key: "vendor/nvidia/model/*", - Value: "true", - }) + for vendor, models := range vendors { + switch vendor { + case "nvidia": + case "amd": + default: + return fmt.Errorf("sdl: unsupported GPU vendor (%s)", vendor) + } + + for _, model := range models { + res = append(res, types.Attribute{ + Key: fmt.Sprintf("vendor/%s/model/%s", vendor, model.String()), + Value: "true", + }) + } + + if len(models) == 0 { + res = append(res, types.Attribute{ + Key: fmt.Sprintf("vendor/%s/model/*", vendor), + Value: "true", + }) + } } sort.Sort(res) diff --git a/sdl/gpu_test.go b/sdl/gpu_test.go index c409f0820d..4942bc2fb7 100644 --- a/sdl/gpu_test.go +++ b/sdl/gpu_test.go @@ -3,93 +3,155 @@ package sdl import ( "testing" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "gopkg.in/yaml.v3" ) +type testGpuAttributes map[string]string +type testGpuResource struct { + units gpuQuantity + attr testGpuAttributes +} + +type gpuTestCase struct { + name string + sdl string + expResource testGpuResource +} + func TestV2ResourceGPU_EmptyVendor(t *testing.T) { - var stream = ` + tests := []gpuTestCase{ + { + + name: "missing-vendor", + sdl: ` units: 1 attributes: vendor: -` - var p v2ResourceGPU +`, + }, + } + + for _, test := range tests { + test := test + t.Run(test.name, func(t *testing.T) { + var p v2ResourceGPU - err := yaml.Unmarshal([]byte(stream), &p) - require.Error(t, err) + err := yaml.Unmarshal([]byte(test.sdl), &p) + assert.Error(t, err) + assert.EqualError(t, err, ErrResourceGPUEmptyVendors.Error()) + }) + } } -func TestV2ResourceGPU_Wildcard(t *testing.T) { - var stream = ` +func TestV2ResourceGPU_UnknownVendor(t *testing.T) { + tests := []gpuTestCase{ + { + + name: "missing-vendor", + sdl: ` units: 1 attributes: vendor: - nvidia: -` - var p v2ResourceGPU - - err := yaml.Unmarshal([]byte(stream), &p) - require.NoError(t, err) - require.Equal(t, gpuQuantity(1), p.Units) - require.Equal(t, 1, len(p.Attributes)) - require.Equal(t, "vendor/nvidia/model/*", p.Attributes[0].Key) - require.Equal(t, "true", p.Attributes[0].Value) + foo: +`, + }, + } + + for _, test := range tests { + test := test + t.Run(test.name, func(t *testing.T) { + var p v2ResourceGPU + + err := yaml.Unmarshal([]byte(test.sdl), &p) + assert.Error(t, err) + assert.ErrorContains(t, err, "sdl: unsupported GPU vendor") + }) + } } -func TestV2ResourceGPU_SingleModel(t *testing.T) { - var stream = ` +func TestV2ResourceGPU_InvalidRAM(t *testing.T) { + tests := []gpuTestCase{ + { + + name: "invalid-ram", + sdl: ` units: 1 attributes: vendor: nvidia: - model: a100 -` - var p v2ResourceGPU - - err := yaml.Unmarshal([]byte(stream), &p) - require.NoError(t, err) - require.Equal(t, gpuQuantity(1), p.Units) - require.Equal(t, 1, len(p.Attributes)) - require.Equal(t, "vendor/nvidia/model/a100", p.Attributes[0].Key) - require.Equal(t, "true", p.Attributes[0].Value) + ram: 80G +`, + }, + } + + for _, test := range tests { + test := test + t.Run(test.name, func(t *testing.T) { + var p v2ResourceGPU + + err := yaml.Unmarshal([]byte(test.sdl), &p) + assert.Error(t, err) + assert.EqualError(t, err, errResourceMemoryInvalid.Error()) + }) + } } -func TestV2ResourceGPU_SingleModelWithRAM(t *testing.T) { - var stream = ` +func TestV2ResourceGPU(t *testing.T) { + tests := []gpuTestCase{ + { + name: "wildcard-nvidia", + sdl: ` +units: 1 +attributes: + vendor: + nvidia: +`, + expResource: testGpuResource{ + units: 1, + attr: testGpuAttributes{ + "vendor/nvidia/model/*": "true", + }, + }, + }, + { + name: "single-model-nvidia", + sdl: ` units: 1 attributes: vendor: nvidia: - model: a100 - ram: 80Gi -` - var p v2ResourceGPU - - err := yaml.Unmarshal([]byte(stream), &p) - require.NoError(t, err) - require.Equal(t, gpuQuantity(1), p.Units) - require.Equal(t, 1, len(p.Attributes)) - require.Equal(t, "vendor/nvidia/model/a100/80Gi", p.Attributes[0].Key) - require.Equal(t, "true", p.Attributes[0].Value) -} - -func TestV2ResourceGPU_InvalidRAMUnit(t *testing.T) { - var stream = ` +`, + expResource: testGpuResource{ + units: 1, + attr: testGpuAttributes{ + "vendor/nvidia/model/a100": "true", + }, + }, + }, + { + name: "single-model-with-ram-nvidia", + sdl: ` units: 1 attributes: vendor: nvidia: - model: a100 - ram: 80G -` - var p v2ResourceGPU - - err := yaml.Unmarshal([]byte(stream), &p) - require.Error(t, err) -} - -func TestV2ResourceGPU_MultipleModels(t *testing.T) { - var stream = ` + ram: 80Gi +`, + expResource: testGpuResource{ + units: 1, + attr: testGpuAttributes{ + "vendor/nvidia/model/a100/ram/80Gi": "true", + }, + }, + }, + { + name: "multiple-models-with-ram-nvidia", + sdl: ` units: 1 attributes: vendor: @@ -98,21 +160,18 @@ attributes: ram: 80Gi - model: a100 ram: 40Gi -` - var p v2ResourceGPU - - err := yaml.Unmarshal([]byte(stream), &p) - require.NoError(t, err) - require.Equal(t, gpuQuantity(1), p.Units) - require.Equal(t, 2, len(p.Attributes)) - require.Equal(t, "vendor/nvidia/model/a100/40Gi", p.Attributes[0].Key) - require.Equal(t, "true", p.Attributes[0].Value) - require.Equal(t, "vendor/nvidia/model/a100/80Gi", p.Attributes[1].Key) - require.Equal(t, "true", p.Attributes[1].Value) -} - -func TestV2ResourceGPU_MultipleModels2(t *testing.T) { - var stream = ` +`, + expResource: testGpuResource{ + units: 1, + attr: testGpuAttributes{ + "vendor/nvidia/model/a100/ram/40Gi": "true", + "vendor/nvidia/model/a100/ram/80Gi": "true", + }, + }, + }, + { + name: "multiple-models-mix-nvidia", + sdl: ` units: 1 attributes: vendor: @@ -120,36 +179,98 @@ attributes: - model: a100 ram: 80Gi - model: a100 -` - var p v2ResourceGPU - - err := yaml.Unmarshal([]byte(stream), &p) - require.NoError(t, err) - require.Equal(t, gpuQuantity(1), p.Units) - require.Equal(t, 2, len(p.Attributes)) - require.Equal(t, "vendor/nvidia/model/a100", p.Attributes[0].Key) - require.Equal(t, "true", p.Attributes[0].Value) - require.Equal(t, "vendor/nvidia/model/a100/80Gi", p.Attributes[1].Key) - require.Equal(t, "true", p.Attributes[1].Value) -} - -func TestV2ResourceGPU_MultipleModels3(t *testing.T) { - var stream = ` +`, + expResource: testGpuResource{ + units: 1, + attr: testGpuAttributes{ + "vendor/nvidia/model/a100": "true", + "vendor/nvidia/model/a100/ram/80Gi": "true", + }, + }, + }, + { + name: "multiple-models-nvidia", + sdl: ` units: 1 attributes: vendor: nvidia: - - model: a6000 + - model: a100 - model: a40 -` - var p v2ResourceGPU - - err := yaml.Unmarshal([]byte(stream), &p) - require.NoError(t, err) - require.Equal(t, gpuQuantity(1), p.Units) - require.Equal(t, 2, len(p.Attributes)) - require.Equal(t, "vendor/nvidia/model/a40", p.Attributes[0].Key) - require.Equal(t, "true", p.Attributes[0].Value) - require.Equal(t, "vendor/nvidia/model/a6000", p.Attributes[1].Key) - require.Equal(t, "true", p.Attributes[1].Value) +`, + expResource: testGpuResource{ + units: 1, + attr: testGpuAttributes{ + "vendor/nvidia/model/a40": "true", + "vendor/nvidia/model/a100": "true", + }, + }, + }, + { + name: "multiple-vendors-wildcard", + sdl: ` +units: 1 +attributes: + vendor: + nvidia: + amd: +`, + expResource: testGpuResource{ + units: 1, + attr: testGpuAttributes{ + "vendor/nvidia/model/*": "true", + "vendor/amd/model/*": "true", + }, + }, + }, + { + name: "wildcard-amd", + sdl: ` +units: 1 +attributes: + vendor: + amd: +`, + expResource: testGpuResource{ + units: 1, + attr: testGpuAttributes{ + "vendor/amd/model/*": "true", + }, + }, + }, + { + name: "single-model-amd", + sdl: ` +units: 1 +attributes: + vendor: + amd: + - model: mi250 +`, + expResource: testGpuResource{ + units: 1, + attr: testGpuAttributes{ + "vendor/amd/model/mi250": "true", + }, + }, + }, + } + + for idx := range tests { + tc := tests[idx] + t.Run(tc.name, func(t *testing.T) { + var p v2ResourceGPU + + err := yaml.Unmarshal([]byte(tc.sdl), &p) + require.NoError(t, err) + + assert.Equal(t, tc.expResource.units, p.Units) + assert.Equal(t, len(tc.expResource.attr), len(p.Attributes)) + + for i := range p.Attributes { + assert.Contains(t, tc.expResource.attr, p.Attributes[i].Key) + assert.Equal(t, tc.expResource.attr[p.Attributes[i].Key], p.Attributes[i].Value) + } + }) + } } diff --git a/sdl/units.go b/sdl/units.go index d158e44214..f34d159a81 100644 --- a/sdl/units.go +++ b/sdl/units.go @@ -1,6 +1,7 @@ package sdl import ( + "errors" "fmt" "strconv" "strings" @@ -11,7 +12,8 @@ import ( ) var ( - errNegativeValue = fmt.Errorf("invalid: negative value not allowed") + errNegativeValue = errors.New("sdl: negative value not allowed") + errResourceMemoryInvalid = errors.New("sdl: invalid memory quantity") ) var unitSuffixes = map[string]uint64{ @@ -100,7 +102,7 @@ func (u *byteQuantity) UnmarshalYAML(node *yaml.Node) error { func (u *memoryQuantity) UnmarshalYAML(node *yaml.Node) error { val, err := parseWithSuffix(node.Value, memorySuffixes) if err != nil { - return err + return errResourceMemoryInvalid } *u = memoryQuantity(val) return nil