Spaces:

MJobe
/

document-vqa-v2

Running

App Files Files Community

MJobe commited on Dec 12, 2023

Commit

6bbd3ca

•

1 Parent(s): 41659bf

Upload 3 files

Browse files

Files changed (3) hide show

Dockerfile +31 -0
main.py +53 -0
requirements.txt +9 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,31 @@

+# Use the official Python 3.9 image
+FROM python:3.9
+RUN apt-get update && apt-get install -y \
+ tesseract-ocr-all \
+ && rm -rf /var/lib/apt/lists/*
+# Set the working directory to /code
+WORKDIR /code
+# Copy the current directory contents into the container at /code
+COPY ./requirements.txt /code/requirements.txt
+# Install requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+# Set up a new user named "user" with user ID 1000
+RUN useradd -m -u 1000 user
+# Switch to the "user" user
+USER user
+# Set home to the user's home directory
+ENV HOME=/home/user \
+ PATH=/home/user/.local/bin:$PATH
+# Set the working directory to the user's home directory
+WORKDIR $HOME/app
+# Copy the current directory contents into the container at $HOME/app setting the owner to the user
+COPY --chown=user . $HOME/app
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

main.py ADDED Viewed

	@@ -0,0 +1,53 @@

+from io import BytesIO
+from PIL import Image
+from fastapi import FastAPI, File, UploadFile, Form
+from fastapi.responses import JSONResponse
+from pytesseract import pytesseract
+from transformers import pipeline
+app = FastAPI()
+# Load a BERT-based question answering pipeline
+nlp = pipeline('question-answering', model='bert-large-uncased-whole-word-masking-finetuned-squad')
+description = """
+## Image-based Document QA
+This API extracts text from an uploaded image using OCR and performs document question answering using a BERT-based model.
+### Endpoint:
+- **POST /uploadfile/:** Upload an image file to extract text and answer provided questions.
+"""
+app = FastAPI(docs_url="/", description=description)
+@app.post("/uploadfile/", description=description)
+async def perform_document_qa(
+ file: UploadFile = File(...),
+ questions: str = Form(...),
+):
+ try:
+ # Read the uploaded file
+ contents = await file.read()
+ # Convert binary content to image
+ image = Image.open(BytesIO(contents))
+ # Perform OCR to extract text from the image
+ text_content = pytesseract.image_to_string(image)
+ # Split the questions string into a list
+ question_list = [q.strip() for q in questions.split(',')]
+ # Perform document question answering for each question using BERT-based model
+ answers_dict = {}
+ for question in question_list:
+ result = nlp({
+ 'question': question,
+ 'context': text_content
+ })
+ answers_dict[question] = result['answer']
+ return answers_dict
+ except Exception as e:
+ return JSONResponse(content=f"Error processing file: {str(e)}", status_code=500)

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+fastapi==0.74.*
+requests==2.27.*
+uvicorn[standard]==0.17.*
+sentencepiece==0.1.*
+torch==1.11.*
+transformers[vision]==4.*
+pytesseract==0.3.10
+python-multipart==0.0.6
+PyMuPDF