From f2a9b34896041704633d978fb87e60c029ae3634 Mon Sep 17 00:00:00 2001
From: BenjaminBraunDev <benjaminbraun@google.com>
Date: Thu, 10 Apr 2025 17:12:05 +0000
Subject: [PATCH] Rename resources to be model server generic instead of
 referencing vLLM

---
 .../gateway/gke/gcp-backend-policy.yaml       |  2 +-
 config/manifests/gateway/gke/healthcheck.yaml |  2 +-
 config/manifests/gateway/gke/httproute.yaml   |  2 +-
 config/manifests/inferencemodel.yaml          |  6 +++---
 config/manifests/inferencepool-resources.yaml | 20 +++++++++----------
 5 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/config/manifests/gateway/gke/gcp-backend-policy.yaml b/config/manifests/gateway/gke/gcp-backend-policy.yaml
index 7b294304e..f5cc0642d 100644
--- a/config/manifests/gateway/gke/gcp-backend-policy.yaml
+++ b/config/manifests/gateway/gke/gcp-backend-policy.yaml
@@ -6,7 +6,7 @@ spec:
   targetRef:
     group: "inference.networking.x-k8s.io"
     kind: InferencePool
-    name: vllm-llama3-8b-instruct
+    name: llama3-8b-instruct
   default:
     timeoutSec: 300
     logging:
diff --git a/config/manifests/gateway/gke/healthcheck.yaml b/config/manifests/gateway/gke/healthcheck.yaml
index 93b6cd7fa..161e58dea 100644
--- a/config/manifests/gateway/gke/healthcheck.yaml
+++ b/config/manifests/gateway/gke/healthcheck.yaml
@@ -7,7 +7,7 @@ spec:
   targetRef:
     group: "inference.networking.x-k8s.io"
     kind: InferencePool
-    name: vllm-llama3-8b-instruct
+    name: llama3-8b-instruct
   default:
     config:
       type: HTTP
diff --git a/config/manifests/gateway/gke/httproute.yaml b/config/manifests/gateway/gke/httproute.yaml
index 6ea90891c..111f78eda 100644
--- a/config/manifests/gateway/gke/httproute.yaml
+++ b/config/manifests/gateway/gke/httproute.yaml
@@ -11,7 +11,7 @@ spec:
   - backendRefs:
     - group: inference.networking.x-k8s.io
       kind: InferencePool
-      name: vllm-llama3-8b-instruct
+      name: llama3-8b-instruct
     matches:
     - path:
         type: PathPrefix
diff --git a/config/manifests/inferencemodel.yaml b/config/manifests/inferencemodel.yaml
index 67c91d0e5..431105182 100644
--- a/config/manifests/inferencemodel.yaml
+++ b/config/manifests/inferencemodel.yaml
@@ -6,7 +6,7 @@ spec:
   modelName: food-review
   criticality: Standard
   poolRef:
-    name: vllm-llama3-8b-instruct
+    name: llama3-8b-instruct
   targetModels:
   - name: food-review-1
     weight: 100
@@ -19,7 +19,7 @@ spec:
   modelName: meta-llama/Llama-3.1-8B-Instruct
   criticality: Critical
   poolRef:
-    name: vllm-llama3-8b-instruct
+    name: llama3-8b-instruct
 ---
 apiVersion: inference.networking.x-k8s.io/v1alpha2
 kind: InferenceModel
@@ -29,4 +29,4 @@ spec:
   modelName: Qwen/Qwen2.5-1.5B-Instruct
   criticality: Critical
   poolRef:
-    name: vllm-llama3-8b-instruct
+    name: llama3-8b-instruct
diff --git a/config/manifests/inferencepool-resources.yaml b/config/manifests/inferencepool-resources.yaml
index 993b7bf62..c16a0fc39 100644
--- a/config/manifests/inferencepool-resources.yaml
+++ b/config/manifests/inferencepool-resources.yaml
@@ -5,22 +5,22 @@ apiVersion: inference.networking.x-k8s.io/v1alpha2
 kind: InferencePool
 metadata:
   labels:
-  name: vllm-llama3-8b-instruct
+  name: llama3-8b-instruct
 spec:
   targetPortNumber: 8000
   selector:
-    app: vllm-llama3-8b-instruct
+    app: vllm-llama3-8b-instruct # Change this to target a different Model Server Deployment
   extensionRef:
-    name: vllm-llama3-8b-instruct-epp
+    name: llama3-8b-instruct-epp
 ---
 apiVersion: v1
 kind: Service
 metadata:
-  name: vllm-llama3-8b-instruct-epp
+  name: llama3-8b-instruct-epp
   namespace: default
 spec:
   selector:
-    app: vllm-llama3-8b-instruct-epp
+    app: llama3-8b-instruct-epp
   ports:
     - protocol: TCP
       port: 9002
@@ -31,19 +31,19 @@ spec:
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: vllm-llama3-8b-instruct-epp
+  name: llama3-8b-instruct-epp
   namespace: default
   labels:
-    app: vllm-llama3-8b-instruct-epp
+    app: llama3-8b-instruct-epp
 spec:
   replicas: 1
   selector:
     matchLabels:
-      app: vllm-llama3-8b-instruct-epp
+      app: llama3-8b-instruct-epp
   template:
     metadata:
       labels:
-        app: vllm-llama3-8b-instruct-epp
+        app: llama3-8b-instruct-epp
     spec:
       # Conservatively, this timeout should mirror the longest grace period of the pods within the pool
       terminationGracePeriodSeconds: 130
@@ -53,7 +53,7 @@ spec:
         imagePullPolicy: Always
         args:
         - -poolName
-        - "vllm-llama3-8b-instruct"
+        - "llama3-8b-instruct"
         - -v
         - "4"
         - --zap-encoder