Some minor ergonomic changes for python backend

- Add validation rule to ensure is set to fastertransformer or python-backend - Add warning if model is unavailable, likely the user has not set correctly Signed-off-by: Parth Thakkar <thakkarparth007@gmail.com>
2025-03-12 04:36:10 -07:00 · 2023-01-02 18:54:51 +05:30 · 2023-01-02 18:54:51 +05:30 · 4bf40cdb6c
commit 4bf40cdb6c
parent 8df5058c5c
3 changed files with 14 additions and 7 deletions
--- a/copilot_proxy/models.py
+++ b/copilot_proxy/models.py
@ -1,10 +1,10 @@
 from typing import Optional, Union

-from pydantic import BaseModel
+from pydantic import BaseModel, constr


 class OpenAIinput(BaseModel):
-    model: str = "fastertransformer"
+    model: constr(regex="^(fastertransformer|py-model)$") = "fastertransformer"
    prompt: Optional[str]
    suffix: Optional[str]
    max_tokens: Optional[int] = 16
--- a/copilot_proxy/utils/codegen.py
+++ b/copilot_proxy/utils/codegen.py
@ -97,7 +97,7 @@ class CodeGenProxy:
        output_len = np.ones_like(input_len).astype(np_type) * max_tokens
        num_logprobs = data.get('logprobs', -1)
        if num_logprobs is None:
-            num_logprobs = 1
+            num_logprobs = -1
        want_logprobs = num_logprobs > 0

        temperature = data.get('temperature', 0.2)
@ -246,8 +246,15 @@ class CodeGenProxy:
        st = time.time()
        try:
            completion, choices = self.generate(data)
-        except InferenceServerException as E:
-            print(E)
+        except InferenceServerException as exc:
+            # status: unavailable -- this happens if the `model` string is invalid
+            print(exc)
+            if exc.status() == 'StatusCode.UNAVAILABLE':
+                print(
+                    f"WARNING: Model '{data['model']}' is not available. Please ensure that "
+                    "`model` is set to either 'fastertransformer' or 'py-model' depending on "
+                    "your installation"
+                )
            completion = {}
            choices = []
        ed = time.time()
--- a/tests/python_backend/test_setup.py
+++ b/tests/python_backend/test_setup.py
@ -78,13 +78,13 @@ def load_test_env():
    return env

 def run_inference(
-    prompt: str, model_name: str = "py-model", port: int = 5000, return_all: bool = False,
+    prompt: str, model: str = "py-model", port: int = 5000, return_all: bool = False,
    **kwargs
 ) -> Union[str, Dict]:
    "Invokes the copilot proxy with the given prompt and returns the completion"
    endpoint = f"http://localhost:{port}/v1/engines/codegen/completions"
    data = {
-        "model": model_name,
+        "model": model,
        "prompt": prompt,
        "suffix": kwargs.get("suffix", ""),
        "max_tokens": kwargs.get("max_tokens", 16),