"""
author: Elena Lowery

This code sample shows how to invoke Large Language Models (LLMs) deployed in watsonx.ai.
Documentation: https://ibm.github.io/watson-machine-learning-sdk/foundation_models.html
You will need to provide your IBM Cloud API key and a watonx.ai project id  (any project)
for accessing watsonx.ai in a .env file
This example shows simple use cases without comprehensive prompt tuning
"""

# Install the wml api in your Python environment prior to running this example:
# pip install ibm-watson-machine-learning
# pip install ibm-cloud-sdk-core
# pip install python-dotenv
# pip install gradio

# For reading credentials from the .env file
import os
from dotenv import load_dotenv

# WML python SDK
from ibm_watson_machine_learning.foundation_models import Model
from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams
from ibm_watson_machine_learning.foundation_models.utils.enums import ModelTypes, DecodingMethods

# For invocation of LLM with REST API
import requests, json
from ibm_cloud_sdk_core import IAMTokenManager

# For creating Gradio interface
import gradio as gr

# URL of the hosted LLMs is hardcoded because at this time all LLMs share the same endpoint
url = "https://us-south.ml.cloud.ibm.com"

# These global variables will be updated in get_credentials() functions
watsonx_project_id = ""
# Replace with your IBM Cloud key
api_key = ""

def get_credentials():
    load_dotenv()
    # Update the global variables that will be used for authentication in another function
    globals()["api_key"] = os.getenv("api_key", None)
    globals()["watsonx_project_id"] = os.getenv("project_id", None)

# The get_model function creates an LLM model object with the specified parameters
def get_model(model_type, max_tokens, min_tokens, decoding, temperature):
    generate_params = {
        GenParams.MAX_NEW_TOKENS: max_tokens,
        GenParams.MIN_NEW_TOKENS: min_tokens,
        GenParams.DECODING_METHOD: decoding,
        GenParams.TEMPERATURE: temperature
    }

    model = Model(
        model_id=model_type,
        params=generate_params,
        credentials={
            "apikey": api_key,
            "url": url
        },
        project_id=watsonx_project_id
    )

    return model

def generate_response(model_type, prompt, max_tokens, min_tokens, decoding, temperature):
    model = get_model(model_type, max_tokens, min_tokens, decoding, temperature)
    generated_response = model.generate(prompt=prompt)
    return generated_response['results'][0]['generated_text']

def demo_LLM_invocation(prompt, model_type="google/flan-ul2", max_tokens=300, min_tokens=50, decoding="sample", temperature=0.7):
    get_credentials()
    response = generate_response(model_type, prompt, max_tokens, min_tokens, decoding, temperature)
    return response

# Gradio interface
def gradio_interface(prompt):
    response = demo_LLM_invocation(prompt)
    return response

# Create a Gradio app
iface = gr.Interface(
    fn=gradio_interface,
    inputs="text",
    outputs="text",
    title="🌠 Test watsonx.ai LLM",
    description="Ask a question and get a response from the IBM Watson LLM. For example: 'What is IBM?'"
)

if __name__ == "__main__":
    iface.launch()