AmirMohseni commited on
Commit
9bfa0e0
1 Parent(s): 50adbf3

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +24 -4
README.md CHANGED
@@ -71,19 +71,39 @@ Here is how you can use this model:
71
  ```python
72
  from peft import PeftModel
73
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
74
 
 
75
  base_model = "meta-llama/Meta-Llama-3.1-8B-Instruct"
76
  adapter_model = "AmirMohseni/Llama-3.1-8B-Instruct-Persian-finetuned-sft"
77
 
78
- model = AutoModelForCausalLM.from_pretrained(base_model)
 
79
  model = PeftModel.from_pretrained(model, adapter_model)
80
 
 
 
 
 
 
81
  tokenizer = AutoTokenizer.from_pretrained(base_model)
82
 
 
 
 
 
83
  # Example usage
84
- prompt = "راه‌های تقویت حافظه چیست؟"
85
- inputs = tokenizer(prompt, return_tensors="pt")
86
- outputs = model.generate(**inputs)
 
 
 
 
 
 
 
 
87
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
88
  print(response)
89
  ```
 
71
  ```python
72
  from peft import PeftModel
73
  from transformers import AutoModelForCausalLM, AutoTokenizer
74
+ import torch
75
 
76
+ # Define the base model and the adapter model
77
  base_model = "meta-llama/Meta-Llama-3.1-8B-Instruct"
78
  adapter_model = "AmirMohseni/Llama-3.1-8B-Instruct-Persian-finetuned-sft"
79
 
80
+ # Load the base model and apply the adapter model using PEFT
81
+ model = AutoModelForCausalLM.from_pretrained(base_model, device_map={"": 0})
82
  model = PeftModel.from_pretrained(model, adapter_model)
83
 
84
+ # Check if CUDA is available, otherwise use CPU
85
+ device = "cuda" if torch.cuda.is_available() else "cpu"
86
+ model = model.to(device)
87
+
88
+ # Load the tokenizer
89
  tokenizer = AutoTokenizer.from_pretrained(base_model)
90
 
91
+ # Add a new pad token if necessary
92
+ if tokenizer.pad_token is None:
93
+ tokenizer.add_special_tokens({'pad_token': '[PAD]'}) # Adding a distinct pad token
94
+
95
  # Example usage
96
+ input_text = "چطوری میتونم به اطلاعات درباره ی سهام شرکت های آمریکایی دست پیدا کنم؟"
97
+
98
+ # Tokenize the input and get both input IDs and attention mask
99
+ inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True)
100
+ input_ids = inputs['input_ids'].to(device)
101
+ attention_mask = inputs['attention_mask'].to(device)
102
+
103
+ # Generate text
104
+ outputs = model.generate(input_ids, attention_mask=attention_mask, max_length=512, pad_token_id=tokenizer.pad_token_id)
105
+
106
+ # Decode and print the output
107
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
108
  print(response)
109
  ```