Spaces:

aurioldegbelo
/

gpt_space

Sleeping

App Files Files Community

gpt_space / app.py

aurioldegbelo

Rename my_webassistant_103.py to app.py

a92ea26 over 1 year ago

raw

history blame

3.96 kB

	# Code adapted from https://github.com/AIAdvantage/chatgpt-api-youtube
	# To see how to create a virtual environment, check https://python.land/virtual-environments/virtualenv
	# python -m venv my-envi, and the next steps, see https://stackoverflow.com/a/74825209
	# For error of installation due to privileges, see https://stackoverflow.com/questions/66322049/could-not-install-packages-due-to-an-oserror-winerror-2-no-such-file-or-direc
	# For gitignore, see https://github.com/github/gitignore/blob/main/Python.gitignore
	# If VS Code shows issues about execution policies, you may need to change the execution policies settings in the powershell, see https://www.sharepointdiary.com/2014/03/fix-for-powershell-script-cannot-be-loaded-because-running-scripts-is-disabled-on-this-system.html


	import os

	import my_api_keys
	import gradio as gr

	from llama_index import (
	GPTSimpleVectorIndex,
	SimpleDirectoryReader,
	LLMPredictor,
	ServiceContext,
	download_loader,
	PromptHelper
	)

	from llama_index.prompts.prompts import QuestionAnswerPrompt


	# documentation of langchain at https://github.com/hwchase17/langchain
	from langchain.chat_models import ChatOpenAI
	from langchain import OpenAI # if you want to use a model other than gpt-3.5-turbo


	os.environ['OPENAI_API_KEY'] = my_api_keys.my_open_ai_key


	'''
	What is this document about
	Which countries were affected
	How many people injured
	When did the earthquake take place
	What is the president
	what is the data of birth of Germany => Should return no answer
	'''


	def custom_llama_index (question):

	## Working with llama_index = playing around with data augmentation

	## Step 1: load the new data
	# documentation of llama_index at https://gpt-index.readthedocs.io/en/latest/
	# data loaders at https://llamahub.ai/
	#from llama_index import download_loader, GPTSimpleVectorIndex

	SimpleDirectoryReader = download_loader("SimpleDirectoryReader")
	# Take all the files in the data folder, see https://llamahub.ai/l/file
	loader = SimpleDirectoryReader('./data', recursive=True, exclude_hidden=True)
	documents = loader.load_data()
	#print(documents)


	## Step 2: Build a CUSTOM llm index: code adapted from https://github.com/wombyz/custom-knowledge-chatbot/tree/main/custom-knowledge-chatbot
	# Official documentation: https://gpt-index.readthedocs.io/en/latest/how_to/customization/custom_llms.html

	# define prompt helper
	# set maximum input size
	max_input_size = 2048
	# set number of output tokens
	num_output = 256
	# set maximum chunk overlap
	max_chunk_overlap = 20
	prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)

	# define LLM
	llm_predictor = LLMPredictor(llm=OpenAI(temperature=0.5, model_name="text-davinci-002"))
	service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)

	# build index
	custom_index = GPTSimpleVectorIndex.from_documents(documents, service_context=service_context)


	## Step 3: reuse the custom index to get some answers
	# get response from query
	response = custom_index.query(question)


	# If we want to include prompt-engineering
	# Code from https://www.linkedin.com/pulse/extending-chatgpt-knowledge-base-custom-datasources-cezar-romaniuc
	QUESTION_ANSWER_PROMPT_TMPL = (
	"You are an assistant that specializes in geographic question answering. If you don't have an answer, answer with 'I don't know' \n"
	"---------------------\n"
	"{context_str}"
	"\n---------------------\n"
	"{query_str}\n"
	)
	QUESTION_ANSWER_PROMPT = QuestionAnswerPrompt(QUESTION_ANSWER_PROMPT_TMPL)

	response_with_custom_prompt = custom_index.query(question, text_qa_template=QUESTION_ANSWER_PROMPT)


	return response_with_custom_prompt


	demo = gr.Interface(fn=custom_llama_index, inputs="text", outputs="text")

	demo.launch()