Spaces:
Build error
Build error
init
Browse files- Dockerfile +1 -0
- config.yaml +13 -0
- entrypoint.sh +12 -11
Dockerfile
CHANGED
@@ -15,6 +15,7 @@ RUN chmod -R 777 /.ollama
|
|
15 |
WORKDIR /.ollama
|
16 |
|
17 |
# Copy the entry point script
|
|
|
18 |
COPY entrypoint.sh /entrypoint.sh
|
19 |
RUN chmod +x /entrypoint.sh
|
20 |
|
|
|
15 |
WORKDIR /.ollama
|
16 |
|
17 |
# Copy the entry point script
|
18 |
+
COPY config.yaml /config.yaml
|
19 |
COPY entrypoint.sh /entrypoint.sh
|
20 |
RUN chmod +x /entrypoint.sh
|
21 |
|
config.yaml
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model_list:
|
2 |
+
- model_name: gollama-mistral-7b
|
3 |
+
litellm_params:
|
4 |
+
model: ollama/mistral:7b
|
5 |
+
api_base: https://zhengr-ollama.hf.space
|
6 |
+
|
7 |
+
litellm_settings: # module level litellm settings - https://github.com/BerriAI/litellm/blob/main/litellm/__init__.py
|
8 |
+
drop_params: True
|
9 |
+
success_callback: ["langfuse"] # OPTIONAL - if you want to start sending LLM Logs to Langfuse. Make sure to set `LANGFUSE_PUBLIC_KEY` and `LANGFUSE_SECRET_KEY` in your env
|
10 |
+
|
11 |
+
general_settings:
|
12 |
+
master_key: sk-1234 # [OPTIONAL] Only use this if you to require all calls to contain this key (Authorization: Bearer sk-1234)
|
13 |
+
alerting: ["slack"] # [OPTIONAL] If you want Slack Alerts for Hanging LLM requests, Slow llm responses, Budget Alerts. Make sure to set `SLACK_WEBHOOK_URL` in your env
|
entrypoint.sh
CHANGED
@@ -2,21 +2,22 @@
|
|
2 |
|
3 |
# Starting server
|
4 |
echo "Starting server"
|
5 |
-
ollama serve &
|
6 |
-
sleep 1
|
7 |
|
8 |
# Splitting the models by comma and pulling each
|
9 |
-
IFS=',' read -ra MODELS <<< "$model"
|
10 |
-
for m in "${MODELS[@]}"; do
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
# No need to sleep here unless you want to give some delay between each pull for some reason
|
17 |
-
done
|
18 |
|
19 |
-
litellm --model ollama/"$m" --drop_params
|
|
|
20 |
|
21 |
# Keep the script running to prevent the container from exiting
|
22 |
wait
|
|
|
2 |
|
3 |
# Starting server
|
4 |
echo "Starting server"
|
5 |
+
#ollama serve &
|
6 |
+
#sleep 1
|
7 |
|
8 |
# Splitting the models by comma and pulling each
|
9 |
+
# IFS=',' read -ra MODELS <<< "$model"
|
10 |
+
# for m in "${MODELS[@]}"; do
|
11 |
+
# echo "Pulling $m"
|
12 |
+
# ollama pull "$m"
|
13 |
+
# sleep 5
|
14 |
+
# echo "Running $m"
|
15 |
+
# ollama run "$m" --keepalive -1s
|
16 |
# No need to sleep here unless you want to give some delay between each pull for some reason
|
17 |
+
# done
|
18 |
|
19 |
+
#litellm --model ollama/"$m" --drop_params
|
20 |
+
litellm --config /config.yaml
|
21 |
|
22 |
# Keep the script running to prevent the container from exiting
|
23 |
wait
|