import gradio as gr gr.load("models/ai-forever/ruGPT-3.5-13B").launch() model = AutoModel.from_pretrained("models/ai-forever/ruGPT-3.5-13B") tokenizer = AutoTokenizer.from_pretrained("models/ai-forever/ruGPT-3.5-13B") print('launch') def greet(request): print('greet') encoded_input = tokenizer(request, return_tensors='pt', \ add_special_tokens=False).to('cuda:0') print('encoded_input') output = model.generate( **encoded_input, num_beams=2, do_sample=True, max_new_tokens=100 ) print('output') return tokenizer.decode(output[0], skip_special_tokens=True) iface = gr.Interface(fn=greet, inputs="text", outputs="text") iface.launch()