|
#!/bin/bash |
|
|
|
|
|
export OMP_NUM_THREADS=4 |
|
export MKL_NUM_THREADS=4 |
|
export CUDA_VISIBLE_DEVICES=0 |
|
|
|
|
|
ollama serve & |
|
|
|
|
|
if ! ollama list | grep -q "llama3.2:1b"; then |
|
ollama pull llama3.2:1b |
|
fi |
|
|
|
|
|
max_attempts=30 |
|
attempt=0 |
|
while ! curl -s http://localhost:11434/api/tags >/dev/null; do |
|
sleep 1 |
|
attempt=$((attempt + 1)) |
|
if [ $attempt -eq $max_attempts ]; then |
|
echo "Ollama failed to start within 30 seconds. Exiting." |
|
exit 1 |
|
fi |
|
done |
|
|
|
echo "Ollama is ready." |
|
|
|
|
|
echo "API is running on: http://0.0.0.0:7860" |
|
|
|
|
|
uvicorn app:app --host 0.0.0.0 --port 7860 --workers 4 --limit-concurrency 20 |