# Code adapted from https://github.com/AIAdvantage/chatgpt-api-youtube
# To see how to create a virtual environment, check https://python.land/virtual-environments/virtualenv 
# python -m venv my-envi, and the next steps, see https://stackoverflow.com/a/74825209
# For error of installation due to privileges, see https://stackoverflow.com/questions/66322049/could-not-install-packages-due-to-an-oserror-winerror-2-no-such-file-or-direc
# For gitignore, see https://github.com/github/gitignore/blob/main/Python.gitignore
# If VS Code shows issues about execution policies, you may need to change the execution policies settings in the powershell, see https://www.sharepointdiary.com/2014/03/fix-for-powershell-script-cannot-be-loaded-because-running-scripts-is-disabled-on-this-system.html


import os

import my_api_keys
import gradio as gr

from llama_index import (
    GPTSimpleVectorIndex,
    SimpleDirectoryReader,
    LLMPredictor,
    ServiceContext, 
    download_loader, 
    PromptHelper
)

from llama_index.prompts.prompts import QuestionAnswerPrompt


# documentation of langchain at https://github.com/hwchase17/langchain
from langchain.chat_models import ChatOpenAI
from langchain import OpenAI # if you want to use a model other than gpt-3.5-turbo


os.environ['OPENAI_API_KEY'] = my_api_keys.my_open_ai_key


'''
What is this document about
Which countries were affected
How many people injured
When did the earthquake take place
What is the president
what is the data of birth of Germany => Should return no answer
'''


def custom_llama_index (question): 

    ## Working with llama_index = playing around with data augmentation

    ## Step 1: load the new data
    # documentation of llama_index at https://gpt-index.readthedocs.io/en/latest/
    # data loaders at https://llamahub.ai/
    #from llama_index import download_loader, GPTSimpleVectorIndex

    SimpleDirectoryReader = download_loader("SimpleDirectoryReader")
    # Take all the files in the data folder, see https://llamahub.ai/l/file
    loader = SimpleDirectoryReader('./data', recursive=True, exclude_hidden=True)
    documents = loader.load_data()
    #print(documents)


    ## Step 2: Build a CUSTOM llm index: code adapted from https://github.com/wombyz/custom-knowledge-chatbot/tree/main/custom-knowledge-chatbot
    # Official documentation: https://gpt-index.readthedocs.io/en/latest/how_to/customization/custom_llms.html

    # define prompt helper
    # set maximum input size
    max_input_size = 2048
    # set number of output tokens
    num_output = 256
    # set maximum chunk overlap
    max_chunk_overlap = 20
    prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)

    # define LLM
    llm_predictor = LLMPredictor(llm=OpenAI(temperature=0.5, model_name="text-davinci-002"))  
    service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)

    # build index
    custom_index = GPTSimpleVectorIndex.from_documents(documents, service_context=service_context)


    ## Step 3: reuse the custom index to get some answers
    # get response from query
    response = custom_index.query(question)

    
    # If we want to include prompt-engineering
    # Code from https://www.linkedin.com/pulse/extending-chatgpt-knowledge-base-custom-datasources-cezar-romaniuc
    QUESTION_ANSWER_PROMPT_TMPL = (
        "You are an assistant that specializes in geographic question answering. If you don't have an answer, answer with 'I don't know' \n"
        "---------------------\n"
        "{context_str}"
        "\n---------------------\n"
        "{query_str}\n"
    )
    QUESTION_ANSWER_PROMPT = QuestionAnswerPrompt(QUESTION_ANSWER_PROMPT_TMPL)

    response_with_custom_prompt = custom_index.query(question, text_qa_template=QUESTION_ANSWER_PROMPT)
   

    return response_with_custom_prompt 


demo = gr.Interface(fn=custom_llama_index, inputs="text", outputs="text")

demo.launch()