|
import gradio as gr |
|
|
|
gr.load("models/ai-forever/ruGPT-3.5-13B").launch() |
|
model = AutoModel.from_pretrained("models/ai-forever/ruGPT-3.5-13B") |
|
tokenizer = AutoTokenizer.from_pretrained("models/ai-forever/ruGPT-3.5-13B") |
|
print('launch') |
|
def greet(request): |
|
print('greet') |
|
encoded_input = tokenizer(request, return_tensors='pt', \ |
|
add_special_tokens=False).to('cuda:0') |
|
print('encoded_input') |
|
output = model.generate( |
|
**encoded_input, |
|
num_beams=2, |
|
do_sample=True, |
|
max_new_tokens=100 |
|
) |
|
print('output') |
|
return tokenizer.decode(output[0], skip_special_tokens=True) |
|
|
|
iface = gr.Interface(fn=greet, inputs="text", outputs="text") |
|
iface.launch() |