Spaces:
Sleeping
Sleeping
aurioldegbelo
commited on
Commit
•
b9c769d
1
Parent(s):
148cedf
Upload my_webassistant_103.py
Browse files- my_webassistant_103.py +104 -0
my_webassistant_103.py
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Code adapted from https://github.com/AIAdvantage/chatgpt-api-youtube
|
2 |
+
# To see how to create a virtual environment, check https://python.land/virtual-environments/virtualenv
|
3 |
+
# python -m venv my-envi, and the next steps, see https://stackoverflow.com/a/74825209
|
4 |
+
# For error of installation due to privileges, see https://stackoverflow.com/questions/66322049/could-not-install-packages-due-to-an-oserror-winerror-2-no-such-file-or-direc
|
5 |
+
# For gitignore, see https://github.com/github/gitignore/blob/main/Python.gitignore
|
6 |
+
# If VS Code shows issues about execution policies, you may need to change the execution policies settings in the powershell, see https://www.sharepointdiary.com/2014/03/fix-for-powershell-script-cannot-be-loaded-because-running-scripts-is-disabled-on-this-system.html
|
7 |
+
|
8 |
+
|
9 |
+
import os
|
10 |
+
|
11 |
+
import my_api_keys
|
12 |
+
import gradio as gr
|
13 |
+
|
14 |
+
from llama_index import (
|
15 |
+
GPTSimpleVectorIndex,
|
16 |
+
SimpleDirectoryReader,
|
17 |
+
LLMPredictor,
|
18 |
+
ServiceContext,
|
19 |
+
download_loader,
|
20 |
+
PromptHelper
|
21 |
+
)
|
22 |
+
|
23 |
+
from llama_index.prompts.prompts import QuestionAnswerPrompt
|
24 |
+
|
25 |
+
|
26 |
+
# documentation of langchain at https://github.com/hwchase17/langchain
|
27 |
+
from langchain.chat_models import ChatOpenAI
|
28 |
+
from langchain import OpenAI # if you want to use a model other than gpt-3.5-turbo
|
29 |
+
|
30 |
+
|
31 |
+
os.environ['OPENAI_API_KEY'] = my_api_keys.my_open_ai_key
|
32 |
+
|
33 |
+
|
34 |
+
'''
|
35 |
+
What is this document about
|
36 |
+
Which countries were affected
|
37 |
+
How many people injured
|
38 |
+
When did the earthquake take place
|
39 |
+
What is the president
|
40 |
+
what is the data of birth of Germany => Should return no answer
|
41 |
+
'''
|
42 |
+
|
43 |
+
|
44 |
+
def custom_llama_index (question):
|
45 |
+
|
46 |
+
## Working with llama_index = playing around with data augmentation
|
47 |
+
|
48 |
+
## Step 1: load the new data
|
49 |
+
# documentation of llama_index at https://gpt-index.readthedocs.io/en/latest/
|
50 |
+
# data loaders at https://llamahub.ai/
|
51 |
+
#from llama_index import download_loader, GPTSimpleVectorIndex
|
52 |
+
|
53 |
+
SimpleDirectoryReader = download_loader("SimpleDirectoryReader")
|
54 |
+
# Take all the files in the data folder, see https://llamahub.ai/l/file
|
55 |
+
loader = SimpleDirectoryReader('./data', recursive=True, exclude_hidden=True)
|
56 |
+
documents = loader.load_data()
|
57 |
+
#print(documents)
|
58 |
+
|
59 |
+
|
60 |
+
## Step 2: Build a CUSTOM llm index: code adapted from https://github.com/wombyz/custom-knowledge-chatbot/tree/main/custom-knowledge-chatbot
|
61 |
+
# Official documentation: https://gpt-index.readthedocs.io/en/latest/how_to/customization/custom_llms.html
|
62 |
+
|
63 |
+
# define prompt helper
|
64 |
+
# set maximum input size
|
65 |
+
max_input_size = 2048
|
66 |
+
# set number of output tokens
|
67 |
+
num_output = 256
|
68 |
+
# set maximum chunk overlap
|
69 |
+
max_chunk_overlap = 20
|
70 |
+
prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
|
71 |
+
|
72 |
+
# define LLM
|
73 |
+
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0.5, model_name="text-davinci-002"))
|
74 |
+
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)
|
75 |
+
|
76 |
+
# build index
|
77 |
+
custom_index = GPTSimpleVectorIndex.from_documents(documents, service_context=service_context)
|
78 |
+
|
79 |
+
|
80 |
+
## Step 3: reuse the custom index to get some answers
|
81 |
+
# get response from query
|
82 |
+
response = custom_index.query(question)
|
83 |
+
|
84 |
+
|
85 |
+
# If we want to include prompt-engineering
|
86 |
+
# Code from https://www.linkedin.com/pulse/extending-chatgpt-knowledge-base-custom-datasources-cezar-romaniuc
|
87 |
+
QUESTION_ANSWER_PROMPT_TMPL = (
|
88 |
+
"You are an assistant that specializes in geographic question answering. If you don't have an answer, answer with 'I don't know' \n"
|
89 |
+
"---------------------\n"
|
90 |
+
"{context_str}"
|
91 |
+
"\n---------------------\n"
|
92 |
+
"{query_str}\n"
|
93 |
+
)
|
94 |
+
QUESTION_ANSWER_PROMPT = QuestionAnswerPrompt(QUESTION_ANSWER_PROMPT_TMPL)
|
95 |
+
|
96 |
+
response_with_custom_prompt = custom_index.query(question, text_qa_template=QUESTION_ANSWER_PROMPT)
|
97 |
+
|
98 |
+
|
99 |
+
return response_with_custom_prompt
|
100 |
+
|
101 |
+
|
102 |
+
demo = gr.Interface(fn=custom_llama_index, inputs="text", outputs="text")
|
103 |
+
|
104 |
+
demo.launch()
|