Spaces:

Dragneel
/

TinyLlama

Sleeping

dragneel2074 commited on Sep 17

Commit

d7ecf2b

•

1 Parent(s): fe312ba

using tinyllama

Files changed (2) hide show

app.py CHANGED Viewed

@@ -14,7 +14,7 @@ logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 app = FastAPI()
-MODEL_NAME = 'gemma2:2b'
 @lru_cache()
 def get_llm():

 logger = logging.getLogger(__name__)
 app = FastAPI()
+MODEL_NAME = 'tinyllama'
 @lru_cache()
 def get_llm():

start.sh CHANGED Viewed

@@ -9,8 +9,8 @@ export CUDA_VISIBLE_DEVICES=0  # Use the first GPU if available
 ollama serve &  # Use GPU 0 if available
 # Pull the model if not already present
-if ! ollama list | grep -q "gemma2:2b"; then
-    ollama pull gemma2:2b
 fi
 # Wait for Ollama to start up (use a more robust check)

 ollama serve &  # Use GPU 0 if available
 # Pull the model if not already present
+if ! ollama list | grep -q "tinyllama"; then
+    ollama pull tinyllama
 fi
 # Wait for Ollama to start up (use a more robust check)