|
""" |
|
author: Elena Lowery |
|
|
|
This code sample shows how to invoke Large Language Models (LLMs) deployed in watsonx.ai. |
|
Documentation: https://ibm.github.io/watson-machine-learning-sdk/foundation_models.html |
|
You will need to provide your IBM Cloud API key and a watonx.ai project id (any project) |
|
for accessing watsonx.ai in a .env file |
|
This example shows simple use cases without comprehensive prompt tuning |
|
""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os |
|
from dotenv import load_dotenv |
|
|
|
|
|
from ibm_watson_machine_learning.foundation_models import Model |
|
from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams |
|
from ibm_watson_machine_learning.foundation_models.utils.enums import ModelTypes, DecodingMethods |
|
|
|
|
|
import requests, json |
|
from ibm_cloud_sdk_core import IAMTokenManager |
|
|
|
|
|
import gradio as gr |
|
|
|
|
|
url = "https://us-south.ml.cloud.ibm.com" |
|
|
|
|
|
watsonx_project_id = "" |
|
|
|
api_key = "" |
|
|
|
def get_credentials(): |
|
load_dotenv() |
|
|
|
globals()["api_key"] = os.getenv("api_key", None) |
|
globals()["watsonx_project_id"] = os.getenv("project_id", None) |
|
|
|
|
|
def get_model(model_type, max_tokens, min_tokens, decoding, temperature): |
|
generate_params = { |
|
GenParams.MAX_NEW_TOKENS: max_tokens, |
|
GenParams.MIN_NEW_TOKENS: min_tokens, |
|
GenParams.DECODING_METHOD: decoding, |
|
GenParams.TEMPERATURE: temperature |
|
} |
|
|
|
model = Model( |
|
model_id=model_type, |
|
params=generate_params, |
|
credentials={ |
|
"apikey": api_key, |
|
"url": url |
|
}, |
|
project_id=watsonx_project_id |
|
) |
|
|
|
return model |
|
|
|
def generate_response(model_type, prompt, max_tokens, min_tokens, decoding, temperature): |
|
model = get_model(model_type, max_tokens, min_tokens, decoding, temperature) |
|
generated_response = model.generate(prompt=prompt) |
|
return generated_response['results'][0]['generated_text'] |
|
|
|
def demo_LLM_invocation(prompt, model_type="google/flan-ul2", max_tokens=300, min_tokens=50, decoding="sample", temperature=0.7): |
|
get_credentials() |
|
response = generate_response(model_type, prompt, max_tokens, min_tokens, decoding, temperature) |
|
return response |
|
|
|
|
|
def gradio_interface(prompt): |
|
response = demo_LLM_invocation(prompt) |
|
return response |
|
|
|
|
|
iface = gr.Interface( |
|
fn=gradio_interface, |
|
inputs="text", |
|
outputs="text", |
|
title="🌠 Test watsonx.ai LLM", |
|
description="Ask a question and get a response from the IBM Watson LLM. For example: 'What is IBM?'" |
|
) |
|
|
|
if __name__ == "__main__": |
|
iface.launch() |