KingNish commited on
Commit
217892e
1 Parent(s): 2de7f04

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +121 -0
app.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import PyPDF2
2
+ from openpyxl import load_workbook
3
+ from pptx import Presentation
4
+ import gradio as gr
5
+ import io
6
+ import docx2python
7
+ from huggingface_hub import InferenceClient
8
+
9
+ # Initialize the Mistral chat model
10
+ client = InferenceClient("mistralai/Mistral-Nemo-Instruct-2407")
11
+
12
+ def read_document(file):
13
+ file_path = file.name # Get the file path from NamedString
14
+ file_extension = file_path.split('.')[-1].lower()
15
+
16
+ with open(file_path, "rb") as f: # Open the file in binary read mode
17
+ file_content = f.read()
18
+
19
+ if file_extension == 'pdf':
20
+ try:
21
+ pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_content))
22
+ content = ''
23
+ for page in range(len(pdf_reader.pages)):
24
+ content += pdf_reader.pages[page].extract_text()
25
+ return content
26
+ except Exception as e:
27
+ return f"Error reading PDF: {e}"
28
+
29
+ elif file_extension == 'xlsx':
30
+ try:
31
+ wb = load_workbook(io.BytesIO(file_content))
32
+ content = ''
33
+ for sheet in wb.worksheets:
34
+ for row in sheet.rows:
35
+ for cell in row:
36
+ content += str(cell.value) + ' '
37
+ return content
38
+ except Exception as e:
39
+ return f"Error reading XLSX: {e}"
40
+
41
+ elif file_extension == 'pptx':
42
+ try:
43
+ presentation = Presentation(io.BytesIO(file_content))
44
+ content = ''
45
+ for slide in presentation.slides:
46
+ for shape in slide.shapes:
47
+ if hasattr(shape, "text"):
48
+ content += shape.text + ' '
49
+ return content
50
+ except Exception as e:
51
+ return f"Error reading PPTX: {e}"
52
+
53
+ elif file_extension == 'doc' or file_extension == 'docx':
54
+ try:
55
+ doc_result = docx2python.convert(io.BytesIO(file_content))
56
+ content = ''
57
+ for page in doc_result:
58
+ for paragraph in page:
59
+ if isinstance(paragraph, str):
60
+ content += paragraph + ' '
61
+ elif isinstance(paragraph, list):
62
+ for sub_paragraph in paragraph:
63
+ if isinstance(sub_paragraph, str):
64
+ content += sub_paragraph + ' '
65
+ return content
66
+ except Exception as e:
67
+ return f"Error reading DOC/DOCX: {e}"
68
+
69
+ else:
70
+ try:
71
+ content = file_content.decode('utf-8')
72
+ return content
73
+ except Exception as e:
74
+ return f"Error reading file: {e}"
75
+
76
+ def chat_document(file, question):
77
+ content = str(read_document(file))
78
+ if len(content) > 128000:
79
+ content = content[:128000]
80
+
81
+
82
+ # Define system prompt for the chat API
83
+ system_prompt = """
84
+ You are a helpful and informative assistant that can answer questions based on the content of documents.
85
+ You will receive the content of a document and a question about it.
86
+ Your task is to provide a concise and accurate answer to the question based solely on the provided document content.
87
+ If the document does not contain enough information to answer the question, simply state that you cannot answer the question based on the provided information.
88
+ """
89
+
90
+ message = f"""[INST] [SYSTEM] {system_prompt}
91
+ Document Content: {content}
92
+ Question: {question}
93
+ Answer:"""
94
+
95
+ stream = client.text_generation(message, max_new_tokens=512, stream=True, details=True, return_full_text=False)
96
+ output = ""
97
+ for response in stream:
98
+ output += response.token.text
99
+ return output
100
+
101
+
102
+ with gr.Blocks() as demo:
103
+ with gr.Tabs():
104
+ with gr.TabItem("Document Reader"):
105
+ iface1 = gr.Interface(
106
+ fn=read_document,
107
+ inputs=gr.File(label="Upload a Document"),
108
+ outputs=gr.Textbox(label="Document Content"),
109
+ title="Document Reader",
110
+ description="Upload a document (PDF, XLSX, PPTX, TXT, CSV, DOC, DOCX and Code or text file) to read its content."
111
+ )
112
+ with gr.TabItem("Document Chat"):
113
+ iface2 = gr.Interface(
114
+ fn=chat_document,
115
+ inputs=[gr.File(label="Upload a Document"), gr.Textbox(label="Question")],
116
+ outputs=gr.Textbox(label="Answer"),
117
+ title="Document Chat",
118
+ description="Upload a document and ask questions about its content."
119
+ )
120
+
121
+ demo.launch()