mirror of
https://github.com/fauxpilot/fauxpilot.git
synced 2025-03-12 04:36:10 -07:00
Some minor ergonomic changes for python backend
- Add validation rule to ensure is set to fastertransformer or python-backend - Add warning if model is unavailable, likely the user has not set correctly Signed-off-by: Parth Thakkar <thakkarparth007@gmail.com>
This commit is contained in:
parent
8df5058c5c
commit
4bf40cdb6c
@ -1,10 +1,10 @@
|
||||
from typing import Optional, Union
|
||||
|
||||
from pydantic import BaseModel
|
||||
from pydantic import BaseModel, constr
|
||||
|
||||
|
||||
class OpenAIinput(BaseModel):
|
||||
model: str = "fastertransformer"
|
||||
model: constr(regex="^(fastertransformer|py-model)$") = "fastertransformer"
|
||||
prompt: Optional[str]
|
||||
suffix: Optional[str]
|
||||
max_tokens: Optional[int] = 16
|
||||
|
@ -97,7 +97,7 @@ class CodeGenProxy:
|
||||
output_len = np.ones_like(input_len).astype(np_type) * max_tokens
|
||||
num_logprobs = data.get('logprobs', -1)
|
||||
if num_logprobs is None:
|
||||
num_logprobs = 1
|
||||
num_logprobs = -1
|
||||
want_logprobs = num_logprobs > 0
|
||||
|
||||
temperature = data.get('temperature', 0.2)
|
||||
@ -246,8 +246,15 @@ class CodeGenProxy:
|
||||
st = time.time()
|
||||
try:
|
||||
completion, choices = self.generate(data)
|
||||
except InferenceServerException as E:
|
||||
print(E)
|
||||
except InferenceServerException as exc:
|
||||
# status: unavailable -- this happens if the `model` string is invalid
|
||||
print(exc)
|
||||
if exc.status() == 'StatusCode.UNAVAILABLE':
|
||||
print(
|
||||
f"WARNING: Model '{data['model']}' is not available. Please ensure that "
|
||||
"`model` is set to either 'fastertransformer' or 'py-model' depending on "
|
||||
"your installation"
|
||||
)
|
||||
completion = {}
|
||||
choices = []
|
||||
ed = time.time()
|
||||
|
@ -78,13 +78,13 @@ def load_test_env():
|
||||
return env
|
||||
|
||||
def run_inference(
|
||||
prompt: str, model_name: str = "py-model", port: int = 5000, return_all: bool = False,
|
||||
prompt: str, model: str = "py-model", port: int = 5000, return_all: bool = False,
|
||||
**kwargs
|
||||
) -> Union[str, Dict]:
|
||||
"Invokes the copilot proxy with the given prompt and returns the completion"
|
||||
endpoint = f"http://localhost:{port}/v1/engines/codegen/completions"
|
||||
data = {
|
||||
"model": model_name,
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
"suffix": kwargs.get("suffix", ""),
|
||||
"max_tokens": kwargs.get("max_tokens", 16),
|
||||
|
Loading…
x
Reference in New Issue
Block a user