gpt_space / app.py
aurioldegbelo's picture
Rename my_webassistant_103.py to app.py
a92ea26
raw
history blame
3.96 kB
# Code adapted from https://github.com/AIAdvantage/chatgpt-api-youtube
# To see how to create a virtual environment, check https://python.land/virtual-environments/virtualenv
# python -m venv my-envi, and the next steps, see https://stackoverflow.com/a/74825209
# For error of installation due to privileges, see https://stackoverflow.com/questions/66322049/could-not-install-packages-due-to-an-oserror-winerror-2-no-such-file-or-direc
# For gitignore, see https://github.com/github/gitignore/blob/main/Python.gitignore
# If VS Code shows issues about execution policies, you may need to change the execution policies settings in the powershell, see https://www.sharepointdiary.com/2014/03/fix-for-powershell-script-cannot-be-loaded-because-running-scripts-is-disabled-on-this-system.html
import os
import my_api_keys
import gradio as gr
from llama_index import (
GPTSimpleVectorIndex,
SimpleDirectoryReader,
LLMPredictor,
ServiceContext,
download_loader,
PromptHelper
)
from llama_index.prompts.prompts import QuestionAnswerPrompt
# documentation of langchain at https://github.com/hwchase17/langchain
from langchain.chat_models import ChatOpenAI
from langchain import OpenAI # if you want to use a model other than gpt-3.5-turbo
os.environ['OPENAI_API_KEY'] = my_api_keys.my_open_ai_key
'''
What is this document about
Which countries were affected
How many people injured
When did the earthquake take place
What is the president
what is the data of birth of Germany => Should return no answer
'''
def custom_llama_index (question):
## Working with llama_index = playing around with data augmentation
## Step 1: load the new data
# documentation of llama_index at https://gpt-index.readthedocs.io/en/latest/
# data loaders at https://llamahub.ai/
#from llama_index import download_loader, GPTSimpleVectorIndex
SimpleDirectoryReader = download_loader("SimpleDirectoryReader")
# Take all the files in the data folder, see https://llamahub.ai/l/file
loader = SimpleDirectoryReader('./data', recursive=True, exclude_hidden=True)
documents = loader.load_data()
#print(documents)
## Step 2: Build a CUSTOM llm index: code adapted from https://github.com/wombyz/custom-knowledge-chatbot/tree/main/custom-knowledge-chatbot
# Official documentation: https://gpt-index.readthedocs.io/en/latest/how_to/customization/custom_llms.html
# define prompt helper
# set maximum input size
max_input_size = 2048
# set number of output tokens
num_output = 256
# set maximum chunk overlap
max_chunk_overlap = 20
prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
# define LLM
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0.5, model_name="text-davinci-002"))
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)
# build index
custom_index = GPTSimpleVectorIndex.from_documents(documents, service_context=service_context)
## Step 3: reuse the custom index to get some answers
# get response from query
response = custom_index.query(question)
# If we want to include prompt-engineering
# Code from https://www.linkedin.com/pulse/extending-chatgpt-knowledge-base-custom-datasources-cezar-romaniuc
QUESTION_ANSWER_PROMPT_TMPL = (
"You are an assistant that specializes in geographic question answering. If you don't have an answer, answer with 'I don't know' \n"
"---------------------\n"
"{context_str}"
"\n---------------------\n"
"{query_str}\n"
)
QUESTION_ANSWER_PROMPT = QuestionAnswerPrompt(QUESTION_ANSWER_PROMPT_TMPL)
response_with_custom_prompt = custom_index.query(question, text_qa_template=QUESTION_ANSWER_PROMPT)
return response_with_custom_prompt
demo = gr.Interface(fn=custom_llama_index, inputs="text", outputs="text")
demo.launch()