fauxpilot/python_backend/config_template.pbtxt
Parth 7ea388fe19 Fix segfault issue
I'd been using dlpack for copying triton tensors to torch tensors,
which I did because it was advertised to perform zero copy transfers.
Turns out that only worked on my laptop, and didn't work on other machines.
IDK why. But for now, I'm just copying the tensors as triton<->numpy<->torch.
That works on the VM on which earlier code was segfaulting

Signed-off-by: Parth <thakkarparth007@gmail.com>
2022-11-19 18:32:50 +00:00

181 lines
3.0 KiB
Plaintext

name: "py-model"
backend: "python"
max_batch_size: 4
input [
{
name: "input_ids"
data_type: TYPE_INT32
dims: [ -1 ]
},
{
# UNUSED
name: "start_id"
data_type: TYPE_INT32
dims: [ 1 ]
reshape: { shape: [ ] }
optional: true
},
{
# UNUSED
name: "end_id"
data_type: TYPE_INT32
dims: [ 1 ]
reshape: { shape: [ ] }
optional: true
},
{
name: "input_lengths"
data_type: TYPE_INT32
dims: [ 1 ]
reshape: { shape: [ ] }
},
{
name: "request_output_len"
data_type: TYPE_INT32
dims: [ -1 ]
},
{
name: "runtime_top_k"
data_type: TYPE_INT32
dims: [ 1 ]
reshape: { shape: [ ] }
optional: true
},
{
name: "runtime_top_p"
data_type: TYPE_FP32
dims: [ 1 ]
reshape: { shape: [ ] }
optional: true
},
{
# UNUSED
name: "beam_search_diversity_rate"
data_type: TYPE_FP32
dims: [ 1 ]
reshape: { shape: [ ] }
optional: true
},
{
name: "temperature"
data_type: TYPE_FP32
dims: [ 1 ]
reshape: { shape: [ ] }
optional: true
},
{
# UNUSED
name: "len_penalty"
data_type: TYPE_FP32
dims: [ 1 ]
reshape: { shape: [ ] }
optional: true
},
{
# UNUSED
name: "repetition_penalty"
data_type: TYPE_FP32
dims: [ 1 ]
reshape: { shape: [ ] }
optional: true
},
{
# UNUSED
name: "random_seed"
data_type: TYPE_INT32
dims: [ 1 ]
reshape: { shape: [ ] }
optional: true
},
{
# UNUSED
name: "is_return_log_probs"
data_type: TYPE_BOOL
dims: [ 1 ]
reshape: { shape: [ ] }
optional: true
},
{
# UNUSED
name: "beam_width"
data_type: TYPE_INT32
dims: [ 1 ]
reshape: { shape: [ ] }
optional: true
},
{
# UNUSED
name: "bad_words_list"
data_type: TYPE_INT32
dims: [ 2, -1 ]
optional: true
},
{
# UNUSED
name: "stop_words_list"
data_type: TYPE_INT32
dims: [ 2, -1 ]
optional: true
}
]
output [
{
name: "output_ids"
data_type: TYPE_INT32
dims: [ -1, -1, -1 ]
},
{
name: "sequence_length"
data_type: TYPE_INT32
dims: [ -1, -1 ]
} #,
# Following is currently unsupported, but should be supported in the future
# {
# name: "cum_log_probs"
# data_type: TYPE_FP32
# dims: [ -1 ]
# },
# {
# name: "output_log_probs"
# data_type: TYPE_FP32
# dims: [ -1, -1 ]
# }
]
# unsure what this is for
instance_group [
{
count: 1
kind: KIND_CPU
}
]
parameters {
key: "use_half"
value: {
string_value: "1"
}
}
parameters {
key: "model_name"
value: {
string_value: "${model_name}" # e.g. "codegen-350M-multi"
}
}
parameters {
key: "org_name"
value: {
string_value: "${org_name}" # e.g. "Salesforce"
}
}
parameters {
key: "use_int8",
value: {
string_value: "${use_int8}" # e.g. "0" or "1"
}
}
parameters {
key: "use_auto_device_map",
value: {
string_value: "${use_auto_device_map}" # e.g. "0" or "1"
}
}