""" author: Elena Lowery This code sample shows how to invoke Large Language Models (LLMs) deployed in watsonx.ai. Documentation: https://ibm.github.io/watson-machine-learning-sdk/foundation_models.html You will need to provide your IBM Cloud API key and a watonx.ai project id (any project) for accessing watsonx.ai in a .env file This example shows simple use cases without comprehensive prompt tuning """ # Install the wml api in your Python environment prior to running this example: # pip install ibm-watson-machine-learning # pip install ibm-cloud-sdk-core # pip install python-dotenv # pip install gradio # For reading credentials from the .env file import os from dotenv import load_dotenv # WML python SDK from ibm_watson_machine_learning.foundation_models import Model from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams from ibm_watson_machine_learning.foundation_models.utils.enums import ModelTypes, DecodingMethods # For invocation of LLM with REST API import requests, json from ibm_cloud_sdk_core import IAMTokenManager # For creating Gradio interface import gradio as gr # URL of the hosted LLMs is hardcoded because at this time all LLMs share the same endpoint url = "https://us-south.ml.cloud.ibm.com" # These global variables will be updated in get_credentials() functions watsonx_project_id = "" # Replace with your IBM Cloud key api_key = "" def get_credentials(): load_dotenv() # Update the global variables that will be used for authentication in another function globals()["api_key"] = os.getenv("api_key", None) globals()["watsonx_project_id"] = os.getenv("project_id", None) # The get_model function creates an LLM model object with the specified parameters def get_model(model_type, max_tokens, min_tokens, decoding, temperature): generate_params = { GenParams.MAX_NEW_TOKENS: max_tokens, GenParams.MIN_NEW_TOKENS: min_tokens, GenParams.DECODING_METHOD: decoding, GenParams.TEMPERATURE: temperature } model = Model( model_id=model_type, params=generate_params, credentials={ "apikey": api_key, "url": url }, project_id=watsonx_project_id ) return model def generate_response(model_type, prompt, max_tokens, min_tokens, decoding, temperature): model = get_model(model_type, max_tokens, min_tokens, decoding, temperature) generated_response = model.generate(prompt=prompt) return generated_response['results'][0]['generated_text'] def demo_LLM_invocation(prompt, model_type="google/flan-ul2", max_tokens=300, min_tokens=50, decoding="sample", temperature=0.7): get_credentials() response = generate_response(model_type, prompt, max_tokens, min_tokens, decoding, temperature) return response # Gradio interface def gradio_interface(prompt): response = demo_LLM_invocation(prompt) return response # Create a Gradio app iface = gr.Interface( fn=gradio_interface, inputs="text", outputs="text", title="🌠 Test watsonx.ai LLM", description="Ask a question and get a response from the IBM Watson LLM. For example: 'What is IBM?'" ) if __name__ == "__main__": iface.launch()