from transformers import AutoTokenizer, AutoModelForSeq2SeqLM # Load the DistilBART-CNN-12-6 model # loading the model outside of the function makes it faster SUMMARIZATION_MODEL = "sshleifer/distilbart-cnn-12-6" tokenizer = AutoTokenizer.from_pretrained(SUMMARIZATION_MODEL) model = AutoModelForSeq2SeqLM.from_pretrained(SUMMARIZATION_MODEL, device_map="cuda:0") def summarize(text, max_len=20): """ Summarizes the given text using the DistilBART-CNN-12-6 model. Args: text (str): The text to be summarized. max_length (int, optional): The maximum length of the summary. Defaults to 20. Returns: str: The summarized text. """ inputs = tokenizer(text, return_tensors="pt", max_length=max_len, truncation=True, ).input_ids # Move the inputs tensor to the same device as the model tensor inputs = inputs.cuda() outputs = model.generate(inputs, max_new_tokens=100, num_beams=8, length_penalty=0.2, early_stopping=False ) return tokenizer.decode(outputs[0], skip_special_tokens=True) def summarizePipeline(text): from transformers import pipeline pipe = pipeline( "summarization", model=model, tokenizer=tokenizer, ) return pipe(text)[0]["summary_text"]