litellm / config.yaml
zhengr's picture
Update config.yaml
d4224b1 verified
model_list:
- model_name: ollama-phi3.5-3.8b
litellm_params:
model: ollama/phi3.5:3.8b
api_base: https://zhengr-ollama.hf.space
# Model-specific parameters
#model: "huggingface/mistralai/Mistral-7B-Instruct-v0.1"
#api_base: "<your-api-base>"
#api_key: "<your-api-key>" # [OPTIONAL] for hf inference endpoints
#initial_prompt_value: "\n"
#roles: {"system":{"pre_message":"<|im_start|>system\n", "post_message":"<|im_end|>"}, "assistant":{"pre_message":"<|im_start|>assistant\n","post_message":"<|im_end|>"}, "user":{"pre_message":"<|im_start|>user\n","post_message":"<|im_end|>"}}
#final_prompt_value: "\n"
#bos_token: "<s>"
#eos_token: "</s>"
#max_tokens: 4096
litellm_settings: # module level litellm settings - https://github.com/BerriAI/litellm/blob/main/litellm/__init__.py
drop_params: True
general_settings:
#master_key: sk-1234 # [OPTIONAL] Only use this if you to require all calls to contain this key (Authorization: Bearer sk-1234)
#alerting: ["slack"] # [OPTIONAL] If you want Slack Alerts for Hanging LLM requests, Slow llm responses, Budget Alerts. Make sure to set `SLACK_WEBHOOK_URL` in your env