Spaces:

zhengr
/

litellm

Build error

zhengr commited on Jul 22

Commit

df054a5

•

1 Parent(s): 553f5a5

init

Files changed (3) hide show

Dockerfile CHANGED Viewed

@@ -15,6 +15,7 @@ RUN chmod -R 777 /.ollama
 WORKDIR /.ollama
 # Copy the entry point script
 COPY entrypoint.sh /entrypoint.sh
 RUN chmod +x /entrypoint.sh

 WORKDIR /.ollama
 # Copy the entry point script
+COPY config.yaml /config.yaml
 COPY entrypoint.sh /entrypoint.sh
 RUN chmod +x /entrypoint.sh

config.yaml ADDED Viewed

+model_list:
+  - model_name: gollama-mistral-7b
+    litellm_params:
+      model: ollama/mistral:7b
+      api_base: https://zhengr-ollama.hf.space
+litellm_settings: # module level litellm settings - https://github.com/BerriAI/litellm/blob/main/litellm/__init__.py
+  drop_params: True
+  success_callback: ["langfuse"] # OPTIONAL - if you want to start sending LLM Logs to Langfuse. Make sure to set `LANGFUSE_PUBLIC_KEY` and `LANGFUSE_SECRET_KEY` in your env
+general_settings:
+  master_key: sk-1234 # [OPTIONAL] Only use this if you to require all calls to contain this key (Authorization: Bearer sk-1234)
+  alerting: ["slack"] # [OPTIONAL] If you want Slack Alerts for Hanging LLM requests, Slow llm responses, Budget Alerts. Make sure to set `SLACK_WEBHOOK_URL` in your env

entrypoint.sh CHANGED Viewed

@@ -2,21 +2,22 @@
 # Starting server
 echo "Starting server"
-ollama serve &
-sleep 1
 # Splitting the models by comma and pulling each
-IFS=',' read -ra MODELS <<< "$model"
-for m in "${MODELS[@]}"; do
-    echo "Pulling $m"
-    ollama pull "$m"
-    sleep 5
-    echo "Running $m"
-    ollama run "$m"  --keepalive -1s
     # No need to sleep here unless you want to give some delay between each pull for some reason
-done
-litellm --model ollama/"$m" --drop_params
 # Keep the script running to prevent the container from exiting
 wait

 # Starting server
 echo "Starting server"
+#ollama serve &
+#sleep 1
 # Splitting the models by comma and pulling each
+# IFS=',' read -ra MODELS <<< "$model"
+# for m in "${MODELS[@]}"; do
+#     echo "Pulling $m"
+#     ollama pull "$m"
+#     sleep 5
+#     echo "Running $m"
+#     ollama run "$m"  --keepalive -1s
     # No need to sleep here unless you want to give some delay between each pull for some reason
+# done
+#litellm --model ollama/"$m" --drop_params
+litellm --config /config.yaml
 # Keep the script running to prevent the container from exiting
 wait