MJobe commited on
Commit
6bbd3ca
1 Parent(s): 41659bf

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +31 -0
  2. main.py +53 -0
  3. requirements.txt +9 -0
Dockerfile ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use the official Python 3.9 image
2
+ FROM python:3.9
3
+
4
+ RUN apt-get update && apt-get install -y \
5
+ tesseract-ocr-all \
6
+ && rm -rf /var/lib/apt/lists/*
7
+
8
+ # Set the working directory to /code
9
+ WORKDIR /code
10
+
11
+ # Copy the current directory contents into the container at /code
12
+ COPY ./requirements.txt /code/requirements.txt
13
+
14
+ # Install requirements.txt
15
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
16
+
17
+ # Set up a new user named "user" with user ID 1000
18
+ RUN useradd -m -u 1000 user
19
+ # Switch to the "user" user
20
+ USER user
21
+ # Set home to the user's home directory
22
+ ENV HOME=/home/user \
23
+ PATH=/home/user/.local/bin:$PATH
24
+
25
+ # Set the working directory to the user's home directory
26
+ WORKDIR $HOME/app
27
+
28
+ # Copy the current directory contents into the container at $HOME/app setting the owner to the user
29
+ COPY --chown=user . $HOME/app
30
+
31
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
main.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from io import BytesIO
2
+
3
+ from PIL import Image
4
+ from fastapi import FastAPI, File, UploadFile, Form
5
+ from fastapi.responses import JSONResponse
6
+ from pytesseract import pytesseract
7
+ from transformers import pipeline
8
+
9
+ app = FastAPI()
10
+
11
+ # Load a BERT-based question answering pipeline
12
+ nlp = pipeline('question-answering', model='bert-large-uncased-whole-word-masking-finetuned-squad')
13
+
14
+ description = """
15
+ ## Image-based Document QA
16
+ This API extracts text from an uploaded image using OCR and performs document question answering using a BERT-based model.
17
+
18
+ ### Endpoint:
19
+ - **POST /uploadfile/:** Upload an image file to extract text and answer provided questions.
20
+ """
21
+
22
+ app = FastAPI(docs_url="/", description=description)
23
+
24
+ @app.post("/uploadfile/", description=description)
25
+ async def perform_document_qa(
26
+ file: UploadFile = File(...),
27
+ questions: str = Form(...),
28
+ ):
29
+ try:
30
+ # Read the uploaded file
31
+ contents = await file.read()
32
+
33
+ # Convert binary content to image
34
+ image = Image.open(BytesIO(contents))
35
+
36
+ # Perform OCR to extract text from the image
37
+ text_content = pytesseract.image_to_string(image)
38
+
39
+ # Split the questions string into a list
40
+ question_list = [q.strip() for q in questions.split(',')]
41
+
42
+ # Perform document question answering for each question using BERT-based model
43
+ answers_dict = {}
44
+ for question in question_list:
45
+ result = nlp({
46
+ 'question': question,
47
+ 'context': text_content
48
+ })
49
+ answers_dict[question] = result['answer']
50
+
51
+ return answers_dict
52
+ except Exception as e:
53
+ return JSONResponse(content=f"Error processing file: {str(e)}", status_code=500)
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.74.*
2
+ requests==2.27.*
3
+ uvicorn[standard]==0.17.*
4
+ sentencepiece==0.1.*
5
+ torch==1.11.*
6
+ transformers[vision]==4.*
7
+ pytesseract==0.3.10
8
+ python-multipart==0.0.6
9
+ PyMuPDF