awacke1 commited on
Commit
18e41a1
β€’
1 Parent(s): 7f947d1

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +658 -0
app.py ADDED
@@ -0,0 +1,658 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import openai
3
+ import os
4
+ import base64
5
+ import glob
6
+ import io
7
+ import json
8
+ import mistune
9
+ import pytz
10
+ import math
11
+ import requests
12
+ import sys
13
+ import time
14
+ import re
15
+ import textract
16
+ import zipfile # New import for zipping files
17
+ from datetime import datetime
18
+ from openai import ChatCompletion
19
+ from xml.etree import ElementTree as ET
20
+ from bs4 import BeautifulSoup
21
+ from collections import deque
22
+ from audio_recorder_streamlit import audio_recorder
23
+ from dotenv import load_dotenv
24
+ from PyPDF2 import PdfReader
25
+ from langchain.text_splitter import CharacterTextSplitter
26
+ from langchain.embeddings import OpenAIEmbeddings
27
+ from langchain.vectorstores import FAISS
28
+ from langchain.chat_models import ChatOpenAI
29
+ from langchain.memory import ConversationBufferMemory
30
+ from langchain.chains import ConversationalRetrievalChain
31
+ from templates import css, bot_template, user_template
32
+ import streamlit.components.v1 as components # Import Streamlit Components for HTML5
33
+
34
+ # page config and sidebar declares up front allow all other functions to see global class variables
35
+ st.set_page_config(page_title="GPT Streamlit Document Reasoner", layout="wide")
36
+ should_save = st.sidebar.checkbox("πŸ’Ύ Save", value=True)
37
+
38
+ # Whisper Paper - how open STT suddenly got so good:
39
+ # st link button with emoji anyone?
40
+ url="https://arxiv.org/pdf/2212.04356.pdf"
41
+ import random
42
+ def link_button_with_emoji(url):
43
+ emojis = ["πŸ’‰", "πŸ₯", "🌑️", "🩺", "🌑️", "πŸ”¬", "πŸ’Š", "πŸ§ͺ", "πŸ‘¨β€βš•οΈ", "πŸ‘©β€βš•οΈ"]
44
+ random_emoji = random.choice(emojis)
45
+ st.markdown(f"[{random_emoji} Whisper Paper - Robust Speech Recognition via Large-Scale Weak Supervision]({url})")
46
+ url = "https://arxiv.org/pdf/2212.04356.pdf"
47
+ link_button_with_emoji(url)
48
+
49
+
50
+
51
+ def generate_filename_old(prompt, file_type):
52
+ central = pytz.timezone('US/Central')
53
+ safe_date_time = datetime.now(central).strftime("%m%d_%H%M") # Date and time DD-HHMM
54
+ safe_prompt = "".join(x for x in prompt if x.isalnum())[:90] # Limit file name size and trim whitespace
55
+ return f"{safe_date_time}_{safe_prompt}.{file_type}" # Return a safe file name
56
+
57
+ def generate_filename(prompt, file_type):
58
+ central = pytz.timezone('US/Central')
59
+ safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
60
+ replaced_prompt = prompt.replace(" ", "_").replace("\n", "_")
61
+ safe_prompt = "".join(x for x in replaced_prompt if x.isalnum() or x == "_")[:90]
62
+ return f"{safe_date_time}_{safe_prompt}.{file_type}"
63
+
64
+ def transcribe_audio(file_path, model):
65
+ key = os.getenv('OPENAI_API_KEY')
66
+ headers = {
67
+ "Authorization": f"Bearer {key}",
68
+ }
69
+ with open(file_path, 'rb') as f:
70
+ data = {'file': f}
71
+ st.write("Read file {file_path}", file_path)
72
+ OPENAI_API_URL = "https://api.openai.com/v1/audio/transcriptions"
73
+ response = requests.post(OPENAI_API_URL, headers=headers, files=data, data={'model': model})
74
+ if response.status_code == 200:
75
+ st.write(response.json())
76
+ chatResponse = chat_with_model(response.json().get('text'), '') # *************************************
77
+ transcript = response.json().get('text')
78
+ #st.write('Responses:')
79
+ #st.write(chatResponse)
80
+ filename = generate_filename(transcript, 'txt')
81
+ #create_file(filename, transcript, chatResponse)
82
+ response = chatResponse
83
+ user_prompt = transcript
84
+ create_file(filename, user_prompt, response, should_save)
85
+ return transcript
86
+ else:
87
+ st.write(response.json())
88
+ st.error("Error in API call.")
89
+ return None
90
+
91
+ def save_and_play_audio(audio_recorder):
92
+ audio_bytes = audio_recorder()
93
+ if audio_bytes:
94
+ filename = generate_filename("Recording", "wav")
95
+ with open(filename, 'wb') as f:
96
+ f.write(audio_bytes)
97
+ st.audio(audio_bytes, format="audio/wav")
98
+ return filename
99
+ return None
100
+
101
+
102
+ # Define a context dictionary to maintain the state between exec calls
103
+ context = {}
104
+
105
+ def create_file(filename, prompt, response, should_save=True):
106
+ if not should_save:
107
+ return
108
+
109
+ # Extract base filename without extension
110
+ base_filename, ext = os.path.splitext(filename)
111
+
112
+ # Initialize the combined content
113
+ combined_content = ""
114
+
115
+ # Add Prompt with markdown title and emoji
116
+ combined_content += "# Prompt πŸ“\n" + prompt + "\n\n"
117
+
118
+ # Add Response with markdown title and emoji
119
+ combined_content += "# Response πŸ’¬\n" + response + "\n\n"
120
+
121
+ # Check for code blocks in the response
122
+ resources = re.findall(r"```([\s\S]*?)```", response)
123
+ for resource in resources:
124
+ # Check if the resource contains Python code
125
+ if "python" in resource.lower():
126
+ # Remove the 'python' keyword from the code block
127
+ cleaned_code = re.sub(r'^\s*python', '', resource, flags=re.IGNORECASE | re.MULTILINE)
128
+
129
+ # Add Code Results title with markdown and emoji
130
+ combined_content += "# Code Results πŸš€\n"
131
+
132
+ # Redirect standard output to capture it
133
+ original_stdout = sys.stdout
134
+ sys.stdout = io.StringIO()
135
+
136
+ # Execute the cleaned Python code within the context
137
+ try:
138
+ exec(cleaned_code, context)
139
+ code_output = sys.stdout.getvalue()
140
+ combined_content += f"```\n{code_output}\n```\n\n"
141
+ realtimeEvalResponse = "# Code Results πŸš€\n" + "```" + code_output + "```\n\n"
142
+ st.write(realtimeEvalResponse)
143
+
144
+ except Exception as e:
145
+ combined_content += f"```python\nError executing Python code: {e}\n```\n\n"
146
+
147
+ # Restore the original standard output
148
+ sys.stdout = original_stdout
149
+ else:
150
+ # Add non-Python resources with markdown and emoji
151
+ combined_content += "# Resource πŸ› οΈ\n" + "```" + resource + "```\n\n"
152
+
153
+ # Save the combined content to a Markdown file
154
+ if should_save:
155
+ with open(f"{base_filename}-Combined.md", 'w') as file:
156
+ file.write(combined_content)
157
+
158
+
159
+ def create_file_old2(filename, prompt, response, should_save=True):
160
+ if not should_save:
161
+ return
162
+
163
+ # Step 2: Extract base filename without extension
164
+ base_filename, ext = os.path.splitext(filename)
165
+
166
+ # Step 3: Check if the response contains Python code
167
+ has_python_code = bool(re.search(r"```python([\s\S]*?)```", response))
168
+
169
+ # Step 4: Initialize the combined content
170
+ combined_content = ""
171
+
172
+ # Add Prompt with markdown title and emoji
173
+ combined_content += "# Prompt πŸ“\n" + prompt + "\n\n"
174
+
175
+ # Add Response with markdown title and emoji
176
+ combined_content += "# Response πŸ’¬\n" + response + "\n\n"
177
+
178
+ # Check for Python code or other resources and add them with markdown title and emoji
179
+ resources = re.findall(r"```([\s\S]*?)```", response)
180
+ for resource in resources:
181
+ # Check if the resource contains Python code
182
+ if "python" in resource.lower():
183
+ st.markdown('# Running python.. ')
184
+ # Remove the word 'python' from the beginning of the code block
185
+ cleaned_code = re.sub(r'^\s*python', '', resource, flags=re.IGNORECASE | re.MULTILINE)
186
+
187
+ # Add Code Results title with markdown and emoji
188
+ combined_content += "# Code Results πŸš€\n"
189
+
190
+ # Capture standard output
191
+ original_stdout = sys.stdout
192
+ sys.stdout = io.StringIO()
193
+
194
+ # Execute cleaned Python code and capture the output
195
+ try:
196
+ st.markdown('# Running exec.. ')
197
+
198
+ exec(cleaned_code)
199
+ code_output = sys.stdout.getvalue()
200
+ combined_content += f"```\n{code_output}\n```\n\n"
201
+ realtimeEvalResponse = "# Code Results πŸš€\n" + "```" + code_output + "```\n\n"
202
+
203
+ st.write(realtimeEvalResponse)
204
+
205
+ st.markdown('# Completed exec.. ')
206
+
207
+
208
+ except Exception as e:
209
+ combined_content += f"```python\nError executing Python code: {e}\n```\n\n"
210
+ st.markdown('# Error in exec.. ' + combined_content)
211
+
212
+ # Restore the original standard output
213
+ sys.stdout = original_stdout
214
+ else:
215
+ # Add Resource title with markdown and emoji for non-Python resources
216
+ combined_content += "# Resource πŸ› οΈ\n" + "```" + resource + "```\n\n"
217
+
218
+ # Write the combined content into one file
219
+ with open(f"{base_filename}-Combined.md", 'w') as file:
220
+ file.write(combined_content)
221
+
222
+
223
+
224
+ def truncate_document(document, length):
225
+ return document[:length]
226
+
227
+ def divide_document(document, max_length):
228
+ return [document[i:i+max_length] for i in range(0, len(document), max_length)]
229
+
230
+ def get_table_download_link(file_path):
231
+ with open(file_path, 'r') as file:
232
+ try:
233
+ data = file.read()
234
+ except:
235
+ st.write('')
236
+ return file_path
237
+ b64 = base64.b64encode(data.encode()).decode()
238
+ file_name = os.path.basename(file_path)
239
+ ext = os.path.splitext(file_name)[1] # get the file extension
240
+ if ext == '.txt':
241
+ mime_type = 'text/plain'
242
+ elif ext == '.py':
243
+ mime_type = 'text/plain'
244
+ elif ext == '.xlsx':
245
+ mime_type = 'text/plain'
246
+ elif ext == '.csv':
247
+ mime_type = 'text/plain'
248
+ elif ext == '.htm':
249
+ mime_type = 'text/html'
250
+ elif ext == '.md':
251
+ mime_type = 'text/markdown'
252
+ else:
253
+ mime_type = 'application/octet-stream' # general binary data type
254
+ href = f'<a href="data:{mime_type};base64,{b64}" target="_blank" download="{file_name}">{file_name}</a>'
255
+ return href
256
+
257
+ def CompressXML(xml_text):
258
+ root = ET.fromstring(xml_text)
259
+ for elem in list(root.iter()):
260
+ if isinstance(elem.tag, str) and 'Comment' in elem.tag:
261
+ elem.parent.remove(elem)
262
+ return ET.tostring(root, encoding='unicode', method="xml")
263
+
264
+ def read_file_content(file,max_length):
265
+ if file.type == "application/json":
266
+ content = json.load(file)
267
+ return str(content)
268
+ elif file.type == "text/html" or file.type == "text/htm":
269
+ content = BeautifulSoup(file, "html.parser")
270
+ return content.text
271
+ elif file.type == "application/xml" or file.type == "text/xml":
272
+ tree = ET.parse(file)
273
+ root = tree.getroot()
274
+ xml = CompressXML(ET.tostring(root, encoding='unicode'))
275
+ return xml
276
+ elif file.type == "text/markdown" or file.type == "text/md":
277
+ md = mistune.create_markdown()
278
+ content = md(file.read().decode())
279
+ return content
280
+ elif file.type == "text/plain":
281
+ return file.getvalue().decode()
282
+ else:
283
+ return ""
284
+
285
+ def readitaloud(result):
286
+ documentHTML5='''
287
+ <!DOCTYPE html>
288
+ <html>
289
+ <head>
290
+ <title>Read It Aloud</title>
291
+ <script type="text/javascript">
292
+ function readAloud() {
293
+ const text = document.getElementById("textArea").value;
294
+ const speech = new SpeechSynthesisUtterance(text);
295
+ window.speechSynthesis.speak(speech);
296
+ }
297
+ </script>
298
+ </head>
299
+ <body>
300
+ <h1>πŸ”Š Read It Aloud</h1>
301
+ <textarea id="textArea" rows="10" cols="80">
302
+ '''
303
+ documentHTML5 = documentHTML5 + result
304
+ documentHTML5 = documentHTML5 + '''
305
+ </textarea>
306
+ <br>
307
+ <button onclick="readAloud()">πŸ”Š Read Aloud</button>
308
+ </body>
309
+ </html>
310
+ '''
311
+
312
+ components.html(documentHTML5, width=800, height=300)
313
+ #return result
314
+
315
+ def chat_with_model(prompt, document_section, model_choice='gpt-3.5-turbo'):
316
+ model = model_choice
317
+ conversation = [{'role': 'system', 'content': 'You are a helpful assistant.'}]
318
+ conversation.append({'role': 'user', 'content': prompt})
319
+ if len(document_section)>0:
320
+ conversation.append({'role': 'assistant', 'content': document_section})
321
+
322
+ start_time = time.time()
323
+ report = []
324
+ res_box = st.empty()
325
+ collected_chunks = []
326
+ collected_messages = []
327
+
328
+ key = os.getenv('OPENAI_API_KEY')
329
+ openai.api_key = key
330
+ for chunk in openai.ChatCompletion.create(
331
+ model='gpt-3.5-turbo',
332
+ messages=conversation,
333
+ temperature=0.5,
334
+ stream=True
335
+ ):
336
+
337
+ collected_chunks.append(chunk) # save the event response
338
+ chunk_message = chunk['choices'][0]['delta'] # extract the message
339
+ collected_messages.append(chunk_message) # save the message
340
+
341
+ content=chunk["choices"][0].get("delta",{}).get("content")
342
+
343
+ try:
344
+ report.append(content)
345
+ if len(content) > 0:
346
+ result = "".join(report).strip()
347
+ #result = result.replace("\n", "")
348
+ res_box.markdown(f'*{result}*')
349
+ except:
350
+ st.write(' ')
351
+
352
+ full_reply_content = ''.join([m.get('content', '') for m in collected_messages])
353
+ st.write("Elapsed time:")
354
+ st.write(time.time() - start_time)
355
+ readitaloud(full_reply_content)
356
+ return full_reply_content
357
+
358
+ def chat_with_file_contents(prompt, file_content, model_choice='gpt-3.5-turbo'):
359
+ conversation = [{'role': 'system', 'content': 'You are a helpful assistant.'}]
360
+ conversation.append({'role': 'user', 'content': prompt})
361
+ if len(file_content)>0:
362
+ conversation.append({'role': 'assistant', 'content': file_content})
363
+ response = openai.ChatCompletion.create(model=model_choice, messages=conversation)
364
+ return response['choices'][0]['message']['content']
365
+
366
+ def extract_mime_type(file):
367
+ # Check if the input is a string
368
+ if isinstance(file, str):
369
+ pattern = r"type='(.*?)'"
370
+ match = re.search(pattern, file)
371
+ if match:
372
+ return match.group(1)
373
+ else:
374
+ raise ValueError(f"Unable to extract MIME type from {file}")
375
+ # If it's not a string, assume it's a streamlit.UploadedFile object
376
+ elif isinstance(file, streamlit.UploadedFile):
377
+ return file.type
378
+ else:
379
+ raise TypeError("Input should be a string or a streamlit.UploadedFile object")
380
+
381
+ from io import BytesIO
382
+ import re
383
+
384
+ def extract_file_extension(file):
385
+ # get the file name directly from the UploadedFile object
386
+ file_name = file.name
387
+ pattern = r".*?\.(.*?)$"
388
+ match = re.search(pattern, file_name)
389
+ if match:
390
+ return match.group(1)
391
+ else:
392
+ raise ValueError(f"Unable to extract file extension from {file_name}")
393
+
394
+ def pdf2txt(docs):
395
+ text = ""
396
+ for file in docs:
397
+ file_extension = extract_file_extension(file)
398
+ # print the file extension
399
+ st.write(f"File type extension: {file_extension}")
400
+
401
+ # read the file according to its extension
402
+ try:
403
+ if file_extension.lower() in ['py', 'txt', 'html', 'htm', 'xml', 'json']:
404
+ text += file.getvalue().decode('utf-8')
405
+ elif file_extension.lower() == 'pdf':
406
+ from PyPDF2 import PdfReader
407
+ pdf = PdfReader(BytesIO(file.getvalue()))
408
+ for page in range(len(pdf.pages)):
409
+ text += pdf.pages[page].extract_text() # new PyPDF2 syntax
410
+ except Exception as e:
411
+ st.write(f"Error processing file {file.name}: {e}")
412
+
413
+ return text
414
+
415
+ def pdf2txt_old(pdf_docs):
416
+ st.write(pdf_docs)
417
+ for file in pdf_docs:
418
+ mime_type = extract_mime_type(file)
419
+ st.write(f"MIME type of file: {mime_type}")
420
+
421
+ text = ""
422
+ for pdf in pdf_docs:
423
+ pdf_reader = PdfReader(pdf)
424
+ for page in pdf_reader.pages:
425
+ text += page.extract_text()
426
+ return text
427
+
428
+ def txt2chunks(text):
429
+ text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len)
430
+ return text_splitter.split_text(text)
431
+
432
+ def vector_store(text_chunks):
433
+ key = os.getenv('OPENAI_API_KEY')
434
+ embeddings = OpenAIEmbeddings(openai_api_key=key)
435
+ return FAISS.from_texts(texts=text_chunks, embedding=embeddings)
436
+
437
+ def get_chain(vectorstore):
438
+ llm = ChatOpenAI()
439
+ memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
440
+ return ConversationalRetrievalChain.from_llm(llm=llm, retriever=vectorstore.as_retriever(), memory=memory)
441
+
442
+ def process_user_input(user_question):
443
+ response = st.session_state.conversation({'question': user_question})
444
+ st.session_state.chat_history = response['chat_history']
445
+ for i, message in enumerate(st.session_state.chat_history):
446
+ template = user_template if i % 2 == 0 else bot_template
447
+ st.write(template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
448
+ # Save file output from PDF query results
449
+ filename = generate_filename(user_question, 'txt')
450
+ #create_file(filename, user_question, message.content)
451
+ response = message.content
452
+ user_prompt = user_question
453
+ create_file(filename, user_prompt, response, should_save)
454
+ #st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
455
+
456
+ def divide_prompt(prompt, max_length):
457
+ words = prompt.split()
458
+ chunks = []
459
+ current_chunk = []
460
+ current_length = 0
461
+ for word in words:
462
+ if len(word) + current_length <= max_length:
463
+ current_length += len(word) + 1 # Adding 1 to account for spaces
464
+ current_chunk.append(word)
465
+ else:
466
+ chunks.append(' '.join(current_chunk))
467
+ current_chunk = [word]
468
+ current_length = len(word)
469
+ chunks.append(' '.join(current_chunk)) # Append the final chunk
470
+ return chunks
471
+
472
+ def create_zip_of_files(files):
473
+ """
474
+ Create a zip file from a list of files.
475
+ """
476
+ zip_name = "all_files.zip"
477
+ with zipfile.ZipFile(zip_name, 'w') as zipf:
478
+ for file in files:
479
+ zipf.write(file)
480
+ return zip_name
481
+
482
+
483
+ def get_zip_download_link(zip_file):
484
+ """
485
+ Generate a link to download the zip file.
486
+ """
487
+ with open(zip_file, 'rb') as f:
488
+ data = f.read()
489
+ b64 = base64.b64encode(data).decode()
490
+ href = f'<a href="data:application/zip;base64,{b64}" download="{zip_file}">Download All</a>'
491
+ return href
492
+
493
+
494
+ def main():
495
+ #openai.api_key = os.getenv('OPENAI_API_KEY')
496
+
497
+ # File type for output, model choice
498
+ menu = ["txt", "htm", "xlsx", "csv", "md", "py"]
499
+ choice = st.sidebar.selectbox("Output File Type:", menu)
500
+ model_choice = st.sidebar.radio("Select Model:", ('gpt-3.5-turbo', 'gpt-3.5-turbo-0301'))
501
+
502
+ # Audio, transcribe, GPT:
503
+ filename = save_and_play_audio(audio_recorder)
504
+
505
+ if filename is not None:
506
+ try:
507
+ transcription = transcribe_audio(filename, "whisper-1")
508
+ except:
509
+ st.write(' ')
510
+ st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
511
+ filename = None
512
+
513
+ # prompt interfaces
514
+ user_prompt = st.text_area("Enter prompts, instructions & questions:", '', height=100)
515
+
516
+ # file section interface for prompts against large documents as context
517
+ collength, colupload = st.columns([2,3]) # adjust the ratio as needed
518
+ with collength:
519
+ max_length = st.slider("File section length for large files", min_value=1000, max_value=128000, value=12000, step=1000)
520
+ with colupload:
521
+ uploaded_file = st.file_uploader("Add a file for context:", type=["pdf", "xml", "json", "xlsx", "csv", "html", "htm", "md", "txt"])
522
+
523
+
524
+ # Document section chat
525
+
526
+ document_sections = deque()
527
+ document_responses = {}
528
+ if uploaded_file is not None:
529
+ file_content = read_file_content(uploaded_file, max_length)
530
+ document_sections.extend(divide_document(file_content, max_length))
531
+ if len(document_sections) > 0:
532
+ if st.button("πŸ‘οΈ View Upload"):
533
+ st.markdown("**Sections of the uploaded file:**")
534
+ for i, section in enumerate(list(document_sections)):
535
+ st.markdown(f"**Section {i+1}**\n{section}")
536
+ st.markdown("**Chat with the model:**")
537
+ for i, section in enumerate(list(document_sections)):
538
+ if i in document_responses:
539
+ st.markdown(f"**Section {i+1}**\n{document_responses[i]}")
540
+ else:
541
+ if st.button(f"Chat about Section {i+1}"):
542
+ st.write('Reasoning with your inputs...')
543
+ response = chat_with_model(user_prompt, section, model_choice) # *************************************
544
+ st.write('Response:')
545
+ st.write(response)
546
+ document_responses[i] = response
547
+ filename = generate_filename(f"{user_prompt}_section_{i+1}", choice)
548
+ create_file(filename, user_prompt, response, should_save)
549
+ st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
550
+
551
+ if st.button('πŸ’¬ Chat'):
552
+ st.write('Reasoning with your inputs...')
553
+
554
+ #response = chat_with_model(user_prompt, ''.join(list(document_sections,)), model_choice) # *************************************
555
+
556
+ # Divide the user_prompt into smaller sections
557
+ user_prompt_sections = divide_prompt(user_prompt, max_length)
558
+ full_response = ''
559
+ for prompt_section in user_prompt_sections:
560
+ # Process each section with the model
561
+ response = chat_with_model(prompt_section, ''.join(list(document_sections)), model_choice)
562
+ full_response += response + '\n' # Combine the responses
563
+
564
+ #st.write('Response:')
565
+ #st.write(full_response)
566
+
567
+ response = full_response
568
+ st.write('Response:')
569
+ st.write(response)
570
+
571
+ filename = generate_filename(user_prompt, choice)
572
+ create_file(filename, user_prompt, response, should_save)
573
+ st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
574
+
575
+ all_files = glob.glob("*.*")
576
+ all_files = [file for file in all_files if len(os.path.splitext(file)[0]) >= 20] # exclude files with short names
577
+ all_files.sort(key=lambda x: (os.path.splitext(x)[1], x), reverse=True) # sort by file type and file name in descending order
578
+
579
+
580
+ # Sidebar buttons Download All and Delete All
581
+ colDownloadAll, colDeleteAll = st.sidebar.columns([3,3])
582
+ with colDownloadAll:
583
+ if st.button("⬇️ Download All"):
584
+ zip_file = create_zip_of_files(all_files)
585
+ st.markdown(get_zip_download_link(zip_file), unsafe_allow_html=True)
586
+ with colDeleteAll:
587
+ if st.button("πŸ—‘ Delete All"):
588
+ for file in all_files:
589
+ os.remove(file)
590
+ st.experimental_rerun()
591
+
592
+ # Sidebar of Files Saving History and surfacing files as context of prompts and responses
593
+ file_contents=''
594
+ next_action=''
595
+ for file in all_files:
596
+ col1, col2, col3, col4, col5 = st.sidebar.columns([1,6,1,1,1]) # adjust the ratio as needed
597
+ with col1:
598
+ if st.button("🌐", key="md_"+file): # md emoji button
599
+ with open(file, 'r') as f:
600
+ file_contents = f.read()
601
+ next_action='md'
602
+ with col2:
603
+ st.markdown(get_table_download_link(file), unsafe_allow_html=True)
604
+ with col3:
605
+ if st.button("πŸ“‚", key="open_"+file): # open emoji button
606
+ with open(file, 'r') as f:
607
+ file_contents = f.read()
608
+ next_action='open'
609
+ with col4:
610
+ if st.button("πŸ”", key="read_"+file): # search emoji button
611
+ with open(file, 'r') as f:
612
+ file_contents = f.read()
613
+ next_action='search'
614
+ with col5:
615
+ if st.button("πŸ—‘", key="delete_"+file):
616
+ os.remove(file)
617
+ st.experimental_rerun()
618
+
619
+ if len(file_contents) > 0:
620
+ if next_action=='open':
621
+ file_content_area = st.text_area("File Contents:", file_contents, height=500)
622
+ if next_action=='md':
623
+ st.markdown(file_contents)
624
+ if next_action=='search':
625
+ file_content_area = st.text_area("File Contents:", file_contents, height=500)
626
+ st.write('Reasoning with your inputs...')
627
+ response = chat_with_model(user_prompt, file_contents, model_choice)
628
+ filename = generate_filename(file_contents, choice)
629
+ create_file(filename, user_prompt, response, should_save)
630
+
631
+ st.experimental_rerun()
632
+ #st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
633
+
634
+ if __name__ == "__main__":
635
+ main()
636
+
637
+ load_dotenv()
638
+ st.write(css, unsafe_allow_html=True)
639
+
640
+ st.header("Chat with documents :books:")
641
+ user_question = st.text_input("Ask a question about your documents:")
642
+ if user_question:
643
+ process_user_input(user_question)
644
+
645
+ with st.sidebar:
646
+ st.subheader("Your documents")
647
+ docs = st.file_uploader("import documents", accept_multiple_files=True)
648
+ with st.spinner("Processing"):
649
+ raw = pdf2txt(docs)
650
+ if len(raw) > 0:
651
+ length = str(len(raw))
652
+ text_chunks = txt2chunks(raw)
653
+ vectorstore = vector_store(text_chunks)
654
+ st.session_state.conversation = get_chain(vectorstore)
655
+ st.markdown('# AI Search Index of Length:' + length + ' Created.') # add timing
656
+ filename = generate_filename(raw, 'txt')
657
+ create_file(filename, raw, '', should_save)
658
+ #create_file(filename, raw, '')