envoyproxy · mathetake · Jan 16, 2025 · Jan 14, 2025 · Jan 14, 2025 · Jan 14, 2025
@@ -2,6 +2,7 @@ package v1alpha1
 
 import (
 	egv1a1 "github.com/envoyproxy/gateway/api/v1alpha1"
+	corev1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	gwapiv1 "sigs.k8s.io/gateway-api/apis/v1"
 	gwapiv1a2 "sigs.k8s.io/gateway-api/apis/v1alpha2"
@@ -57,7 +58,8 @@ type LLMRouteSpec struct {
 	// Each rule is a subset of the HTTPRoute in the Gateway API (https://gateway-api.sigs.k8s.io/api-types/httproute/).
 	//
 	// AI Gateway controller will generate a HTTPRoute based on the configuration given here with the additional
-	// modifications to achieve the necessary jobs, notably inserting the AI Gateway external processor filter.
+	// modifications to achieve the necessary jobs, notably inserting the AI Gateway filter responsible for
+	// the transformation of the request and response, etc.
 	//
 	// In the matching conditions in the LLMRouteRule, `x-envoy-ai-gateway-model` header is available
 	// if we want to describe the routing behavior based on the model name. The model name is extracted
@@ -69,6 +71,14 @@ type LLMRouteSpec struct {
 	// +kubebuilder:validation:Required
 	// +kubebuilder:validation:MaxItems=128
 	Rules []LLMRouteRule `json:"rules"`
+	// FilterConfig is the configuration for the AI Gateway filter inserted in the generated HTTPRoute.
+	//
+	// An AI Gateway filter is responsible for the transformation of the request and response
+	// as well as the routing behavior based on the model name extracted from the request content, etc.
+	//
+	// Currently, the filter is only implemented as an external process filter, which might be
+	// extended to other types of filters in the future. See https://github.com/envoyproxy/ai-gateway/issues/90
+	FilterConfig *LLMRouteFilterConfig `json:"filterConfig,omitempty"`
 }
 
 // LLMRouteRule is a rule that defines the routing behavior of the LLMRoute.
@@ -122,6 +132,45 @@ type LLMRouteRuleMatch struct {
 	Headers []gwapiv1.HTTPHeaderMatch `json:"headers,omitempty"`
 }
 
+type LLMRouteFilterConfig struct {
+	// Type specifies the type of the filter configuration.
+	//
+	// Currently, only ExternalProcess is supported, and default is ExternalProcess.
+	//
+	// +kubebuilder:default=ExternalProcess
+	Type LLMRouteFilterConfigType `json:"type"`
+
+	// ExternalProcess is the configuration for the external process filter.
+	// This is optional, and if not set, the default values of Deployment spec will be used.
+	//
+	// +optional
+	ExternalProcess *LLMRouteFilterConfigExternalProcess `json:"externalProcess,omitempty"`
+}
+
+// LLMRouteFilterConfigType specifies the type of the filter configuration.
+//
+// +kubebuilder:validation:Enum=ExternalProcess;DynamicModule
+type LLMRouteFilterConfigType string
+
+const (
+	LLMRouteFilterConfigTypeExternalProcess LLMRouteFilterConfigType = "ExternalProcess"
+	LLMRouteFilterConfigTypeDynamicModule   LLMRouteFilterConfigType = "DynamicModule" // Reserved for https://github.com/envoyproxy/ai-gateway/issues/90
+)
+
+type LLMRouteFilterConfigExternalProcess struct {
+	// Replicas is the number of desired pods of the external process deployment.
+	//
+	// +optional
+	Replicas *int32 `json:"replicas,omitempty"`
+	// Resources required by the external process container.
+	// More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+	//
+	// +optional
+	Resources *corev1.ResourceRequirements `json:"resources,omitempty"`
+	// TODO: maybe adding the option not to deploy the external process filter and let the user deploy it manually?
+	// 	Not sure if it is worth it as we are migrating to dynamic modules.
+}
+
 // +kubebuilder:object:root=true
 
 // LLMBackend is a resource that represents a single backend for LLMRoute.

@@ -213,6 +213,7 @@ func (c *llmRouteController) reconcileExtProcDeployment(ctx context.Context, llm
 					},
 				},
 			}
+			extProcDeploymentUpdate(&deployment.Spec, llmRoute.Spec.FilterConfig)
 			_, err = c.kube.AppsV1().Deployments(llmRoute.Namespace).Create(ctx, deployment, metav1.CreateOptions{})
 			if err != nil {
 				return fmt.Errorf("failed to create deployment: %w", err)
@@ -221,11 +222,13 @@ func (c *llmRouteController) reconcileExtProcDeployment(ctx context.Context, llm
 		} else {
 			return fmt.Errorf("failed to get deployment: %w", err)
 		}
+	} else {
+		extProcDeploymentUpdate(&deployment.Spec, llmRoute.Spec.FilterConfig)
+		if _, err = c.kube.AppsV1().Deployments(llmRoute.Namespace).Update(ctx, deployment, metav1.UpdateOptions{}); err != nil {
+			return fmt.Errorf("failed to update deployment: %w", err)
+		}
 	}
 
-	// TODO: reconcile the deployment spec like replicas etc once we have support for it at the CRD level.
-	_ = deployment
-
 	// This is static, so we don't need to update it.
 	service := &corev1.Service{
 		ObjectMeta: metav1.ObjectMeta{
@@ -264,3 +267,16 @@ func ownerReferenceForLLMRoute(llmRoute *aigv1a1.LLMRoute) []metav1.OwnerReferen
 		UID:        llmRoute.UID,
 	}}
 }
+
+func extProcDeploymentUpdate(d *appsv1.DeploymentSpec, filterConfig *aigv1a1.LLMRouteFilterConfig) {
+	if filterConfig == nil || filterConfig.ExternalProcess == nil {
+		return
+	}
+	extProc := filterConfig.ExternalProcess
+	if resource := extProc.Resources; resource != nil {
+		d.Template.Spec.Containers[0].Resources = *resource
+	}
+	if replica := extProc.Replicas; replica != nil {
+		d.Replicas = replica
+	}
+}
@@ -6,8 +6,11 @@ import (
 
 	egv1a1 "github.com/envoyproxy/gateway/api/v1alpha1"
 	"github.com/stretchr/testify/require"
+	corev1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	fake2 "k8s.io/client-go/kubernetes/fake"
+	"k8s.io/utils/ptr"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/client/fake"
 	gwapiv1a2 "sigs.k8s.io/gateway-api/apis/v1alpha2"
@@ -52,22 +55,50 @@ func TestLLMRouteController_reconcileExtProcDeployment(t *testing.T) {
 	c.kube = fake2.NewClientset()
 
 	ownerRef := []metav1.OwnerReference{{APIVersion: "v1", Kind: "Kind", Name: "Name"}}
-	llmRoute := &aigv1a1.LLMRoute{ObjectMeta: metav1.ObjectMeta{Name: "myroute", Namespace: "default"}}
+	llmRoute := &aigv1a1.LLMRoute{
+		ObjectMeta: metav1.ObjectMeta{Name: "myroute", Namespace: "default"},
+		Spec: aigv1a1.LLMRouteSpec{
+			FilterConfig: &aigv1a1.LLMRouteFilterConfig{
+				Type: aigv1a1.LLMRouteFilterConfigTypeExternalProcess,
+				ExternalProcess: &aigv1a1.LLMRouteFilterConfigExternalProcess{
+					Replicas: ptr.To[int32](123),
+					Resources: &corev1.ResourceRequirements{
+						Limits: corev1.ResourceList{
+							corev1.ResourceCPU:    resource.MustParse("200m"),
+							corev1.ResourceMemory: resource.MustParse("100Mi"),
+						},
+					},
+				},
+			},
+		},
+	}
 
 	err := c.reconcileExtProcDeployment(context.Background(), llmRoute, ownerRef)
 	require.NoError(t, err)
 
 	deployment, err := c.kube.AppsV1().Deployments("default").Get(context.Background(), extProcName(llmRoute), metav1.GetOptions{})
 	require.NoError(t, err)
 	require.Equal(t, extProcName(llmRoute), deployment.Name)
-
+	require.Equal(t, int32(123), *deployment.Spec.Replicas)
+	require.Equal(t, ownerRef, deployment.OwnerReferences)
+	require.Equal(t, corev1.ResourceRequirements{
+		Limits: corev1.ResourceList{
+			corev1.ResourceCPU:    resource.MustParse("200m"),
+			corev1.ResourceMemory: resource.MustParse("100Mi"),
+		},
+	}, deployment.Spec.Template.Spec.Containers[0].Resources)
 	service, err := c.kube.CoreV1().Services("default").Get(context.Background(), extProcName(llmRoute), metav1.GetOptions{})
 	require.NoError(t, err)
 	require.Equal(t, extProcName(llmRoute), service.Name)
 
-	// Doing it again should not fail.
+	// Doing it again should not fail and update the deployment.
+	llmRoute.Spec.FilterConfig.ExternalProcess.Replicas = ptr.To[int32](456)
 	err = c.reconcileExtProcDeployment(context.Background(), llmRoute, ownerRef)
 	require.NoError(t, err)
+	// Check the deployment is updated.
+	deployment, err = c.kube.AppsV1().Deployments("default").Get(context.Background(), extProcName(llmRoute), metav1.GetOptions{})
+	require.NoError(t, err)
+	require.Equal(t, int32(456), *deployment.Spec.Replicas)
 }
 
 func TestLLMRouteController_reconcileExtProcExtensionPolicy(t *testing.T) {

@@ -51,6 +51,101 @@ spec:
           spec:
             description: Spec defines the details of the LLM policy.
             properties:
+              filterConfig:
+                description: |-
+                  FilterConfig is the configuration for the AI Gateway filter inserted in the generated HTTPRoute.
+
+                  An AI Gateway filter is responsible for the transformation of the request and response
+                  as well as the routing behavior based on the model name extracted from the request content, etc.
+
+                  Currently, the filter is only implemented as an external process filter, which might be
+                  extended to other types of filters in the future. See https://github.com/envoyproxy/ai-gateway/issues/90
+                properties:
+                  externalProcess:
+                    description: |-
+                      ExternalProcess is the configuration for the external process filter.
+                      This is optional, and if not set, the default values of Deployment spec will be used.
+                    properties:
+                      replicas:
+                        description: Replicas is the number of desired pods of the
+                          external process deployment.
+                        format: int32
+                        type: integer
+                      resources:
+                        description: |-
+                          Resources required by the external process container.
+                          More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+                        properties:
+                          claims:
+                            description: |-
+                              Claims lists the names of resources, defined in spec.resourceClaims,
+                              that are used by this container.
+
+                              This is an alpha field and requires enabling the
+                              DynamicResourceAllocation feature gate.
+
+                              This field is immutable. It can only be set for containers.
+                            items:
+                              description: ResourceClaim references one entry in PodSpec.ResourceClaims.
+                              properties:
+                                name:
+                                  description: |-
+                                    Name must match the name of one entry in pod.spec.resourceClaims of
+                                    the Pod where this field is used. It makes that resource available
+                                    inside a container.
+                                  type: string
+                                request:
+                                  description: |-
+                                    Request is the name chosen for a request in the referenced claim.
+                                    If empty, everything from the claim is made available, otherwise
+                                    only the result of this request.
+                                  type: string
+                              required:
+                              - name
+                              type: object
+                            type: array
+                            x-kubernetes-list-map-keys:
+                            - name
+                            x-kubernetes-list-type: map
+                          limits:
+                            additionalProperties:
+                              anyOf:
+                              - type: integer
+                              - type: string
+                              pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                              x-kubernetes-int-or-string: true
+                            description: |-
+                              Limits describes the maximum amount of compute resources allowed.
+                              More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+                            type: object
+                          requests:
+                            additionalProperties:
+                              anyOf:
+                              - type: integer
+                              - type: string
+                              pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                              x-kubernetes-int-or-string: true
+                            description: |-
+                              Requests describes the minimum amount of compute resources required.
+                              If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+                              otherwise to an implementation-defined value. Requests cannot exceed Limits.
+                              More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+                            type: object
+                        type: object
+                    type: object
+                  type:
+                    default: ExternalProcess
+                    description: |-
+                      Type specifies the type of the filter configuration.
+
+                      Currently, only ExternalProcess is supported, and default is ExternalProcess.
+                    enum:
+                    - ExternalProcess
+                    - DynamicModule
+                    type: string
+                required:
+                - type
+                type: object
               inputSchema:
                 description: |-
                   APISchema specifies the API schema of the input that the target Gateway(s) will receive.
@@ -79,7 +174,8 @@ spec:
                   Each rule is a subset of the HTTPRoute in the Gateway API (https://gateway-api.sigs.k8s.io/api-types/httproute/).
 
                   AI Gateway controller will generate a HTTPRoute based on the configuration given here with the additional
-                  modifications to achieve the necessary jobs, notably inserting the AI Gateway external processor filter.
+                  modifications to achieve the necessary jobs, notably inserting the AI Gateway filter responsible for
+                  the transformation of the request and response, etc.
 
                   In the matching conditions in the LLMRouteRule, `x-envoy-ai-gateway-model` header is available
                   if we want to describe the routing behavior based on the model name. The model name is extracted