|
import os |
|
import re |
|
from fastapi import FastAPI, HTTPException |
|
from fastapi.responses import StreamingResponse |
|
from pydantic import BaseModel |
|
from langchain_community.llms import Ollama |
|
from langchain_core.messages import HumanMessage |
|
import logging |
|
from functools import lru_cache |
|
from langchain.callbacks.manager import CallbackManager |
|
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler |
|
|
|
logging.basicConfig(level=logging.INFO) |
|
logger = logging.getLogger(__name__) |
|
|
|
app = FastAPI() |
|
MODEL_NAME = 'tinyllama' |
|
|
|
@lru_cache() |
|
def get_llm(): |
|
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) |
|
return Ollama(model=MODEL_NAME, callback_manager=callback_manager) |
|
|
|
class Question(BaseModel): |
|
text: str |
|
|
|
@app.get("/") |
|
def read_root(): |
|
return {"Hello": f"Welcome to {MODEL_NAME} FastAPI"} |
|
|
|
@app.post("/ask") |
|
async def ask_question(question: Question): |
|
try: |
|
logger.info(f"Received question: {question.text}") |
|
llm = get_llm() |
|
response = llm.invoke(question.text) |
|
logger.info("Response generated successfully") |
|
return {"answer": response} |
|
except Exception as e: |
|
logger.error(f"Error in /ask endpoint: {str(e)}") |
|
raise HTTPException(status_code=500, detail=str(e)) |
|
|
|
@app.post("/ask_stream") |
|
async def ask_question_stream(question: Question): |
|
try: |
|
logger.info(f"Received question for streaming: {question.text}") |
|
llm = get_llm() |
|
|
|
async def generate(): |
|
full_response = "" |
|
async for chunk in llm.astream(question.text): |
|
full_response += chunk |
|
yield chunk |
|
|
|
|
|
logger.info(f"Full streamed response: {full_response}") |
|
|
|
return StreamingResponse(generate(), media_type="text/plain") |
|
except Exception as e: |
|
logger.error(f"Error in /ask_stream endpoint: {str(e)}") |
|
raise HTTPException(status_code=500, detail=str(e)) |
|
|
|
@app.on_event("startup") |
|
async def startup_event(): |
|
logger.info(f"Starting up with model: {MODEL_NAME}") |
|
|
|
get_llm() |
|
|
|
@app.on_event("shutdown") |
|
async def shutdown_event(): |
|
logger.info("Shutting down") |