--- license: apache-2.0 datasets: - oztrkoguz/Short-Story language: - en metrics: - accuracy pipeline_tag: text2text-generation --- ``` from transformers import AutoModelForCausalLM, AutoTokenizer # Load the base model and tokenizer tokenizer_model = "unsloth/Phi-3-mini-4k-instruct" lora_model = "oztrkoguz/phi3_short_story_merged_bfloat16" tokenizer = AutoTokenizer.from_pretrained(tokenizer_model) model = AutoModelForCausalLM.from_pretrained(lora_model).to("cuda") alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. ### Instruction: create a short story from this keywords ### Input: {} ### Response: {}""" # Use the merged model for inference inputs = tokenizer( [ alpaca_prompt.format( "cat, dog, human", "", # output - leave this blank for generation! ) ], return_tensors = "pt").to("cuda") with torch.no_grad(): output = model.generate( **inputs, max_length=100 ) generated_text = tokenizer.decode(output[0], skip_special_tokens=True) print(generated_text) ```