aurioldegbelo commited on
Commit
b9c769d
1 Parent(s): 148cedf

Upload my_webassistant_103.py

Browse files
Files changed (1) hide show
  1. my_webassistant_103.py +104 -0
my_webassistant_103.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Code adapted from https://github.com/AIAdvantage/chatgpt-api-youtube
2
+ # To see how to create a virtual environment, check https://python.land/virtual-environments/virtualenv
3
+ # python -m venv my-envi, and the next steps, see https://stackoverflow.com/a/74825209
4
+ # For error of installation due to privileges, see https://stackoverflow.com/questions/66322049/could-not-install-packages-due-to-an-oserror-winerror-2-no-such-file-or-direc
5
+ # For gitignore, see https://github.com/github/gitignore/blob/main/Python.gitignore
6
+ # If VS Code shows issues about execution policies, you may need to change the execution policies settings in the powershell, see https://www.sharepointdiary.com/2014/03/fix-for-powershell-script-cannot-be-loaded-because-running-scripts-is-disabled-on-this-system.html
7
+
8
+
9
+ import os
10
+
11
+ import my_api_keys
12
+ import gradio as gr
13
+
14
+ from llama_index import (
15
+ GPTSimpleVectorIndex,
16
+ SimpleDirectoryReader,
17
+ LLMPredictor,
18
+ ServiceContext,
19
+ download_loader,
20
+ PromptHelper
21
+ )
22
+
23
+ from llama_index.prompts.prompts import QuestionAnswerPrompt
24
+
25
+
26
+ # documentation of langchain at https://github.com/hwchase17/langchain
27
+ from langchain.chat_models import ChatOpenAI
28
+ from langchain import OpenAI # if you want to use a model other than gpt-3.5-turbo
29
+
30
+
31
+ os.environ['OPENAI_API_KEY'] = my_api_keys.my_open_ai_key
32
+
33
+
34
+ '''
35
+ What is this document about
36
+ Which countries were affected
37
+ How many people injured
38
+ When did the earthquake take place
39
+ What is the president
40
+ what is the data of birth of Germany => Should return no answer
41
+ '''
42
+
43
+
44
+ def custom_llama_index (question):
45
+
46
+ ## Working with llama_index = playing around with data augmentation
47
+
48
+ ## Step 1: load the new data
49
+ # documentation of llama_index at https://gpt-index.readthedocs.io/en/latest/
50
+ # data loaders at https://llamahub.ai/
51
+ #from llama_index import download_loader, GPTSimpleVectorIndex
52
+
53
+ SimpleDirectoryReader = download_loader("SimpleDirectoryReader")
54
+ # Take all the files in the data folder, see https://llamahub.ai/l/file
55
+ loader = SimpleDirectoryReader('./data', recursive=True, exclude_hidden=True)
56
+ documents = loader.load_data()
57
+ #print(documents)
58
+
59
+
60
+ ## Step 2: Build a CUSTOM llm index: code adapted from https://github.com/wombyz/custom-knowledge-chatbot/tree/main/custom-knowledge-chatbot
61
+ # Official documentation: https://gpt-index.readthedocs.io/en/latest/how_to/customization/custom_llms.html
62
+
63
+ # define prompt helper
64
+ # set maximum input size
65
+ max_input_size = 2048
66
+ # set number of output tokens
67
+ num_output = 256
68
+ # set maximum chunk overlap
69
+ max_chunk_overlap = 20
70
+ prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
71
+
72
+ # define LLM
73
+ llm_predictor = LLMPredictor(llm=OpenAI(temperature=0.5, model_name="text-davinci-002"))
74
+ service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)
75
+
76
+ # build index
77
+ custom_index = GPTSimpleVectorIndex.from_documents(documents, service_context=service_context)
78
+
79
+
80
+ ## Step 3: reuse the custom index to get some answers
81
+ # get response from query
82
+ response = custom_index.query(question)
83
+
84
+
85
+ # If we want to include prompt-engineering
86
+ # Code from https://www.linkedin.com/pulse/extending-chatgpt-knowledge-base-custom-datasources-cezar-romaniuc
87
+ QUESTION_ANSWER_PROMPT_TMPL = (
88
+ "You are an assistant that specializes in geographic question answering. If you don't have an answer, answer with 'I don't know' \n"
89
+ "---------------------\n"
90
+ "{context_str}"
91
+ "\n---------------------\n"
92
+ "{query_str}\n"
93
+ )
94
+ QUESTION_ANSWER_PROMPT = QuestionAnswerPrompt(QUESTION_ANSWER_PROMPT_TMPL)
95
+
96
+ response_with_custom_prompt = custom_index.query(question, text_qa_template=QUESTION_ANSWER_PROMPT)
97
+
98
+
99
+ return response_with_custom_prompt
100
+
101
+
102
+ demo = gr.Interface(fn=custom_llama_index, inputs="text", outputs="text")
103
+
104
+ demo.launch()