pglo commited on
Commit
2394318
1 Parent(s): c99d600

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -30,8 +30,8 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
30
  import torch
31
 
32
  # Instantiate model and tokenizer
33
- tokenizer = AutoTokenizer.from_pretrained("Zyphra/Zamba2-7B-Instruct")
34
- model = AutoModelForCausalLM.from_pretrained("Zyphra/Zamba2-7B-Instruct", device_map="cuda", torch_dtype=torch.bfloat16)
35
 
36
  # Format the input as a chat template
37
  user_turn_1 = "In one season a flower blooms three times. In one year, there is one blooming season. How many times do two flowers bloom in two years? Please include your logic."
 
30
  import torch
31
 
32
  # Instantiate model and tokenizer
33
+ tokenizer = AutoTokenizer.from_pretrained("Zyphra/Zamba2-7B-instruct")
34
+ model = AutoModelForCausalLM.from_pretrained("Zyphra/Zamba2-7B-instruct", device_map="cuda", torch_dtype=torch.bfloat16)
35
 
36
  # Format the input as a chat template
37
  user_turn_1 = "In one season a flower blooms three times. In one year, there is one blooming season. How many times do two flowers bloom in two years? Please include your logic."
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e5e9d53a3156268f039fcb18b65b8ce4b88e7348be51f163c3a7c7e0cb7a1a3
3
- size 4917529496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fb157a960ba1569c04c4d2e141d75b49fa081dd3639534cc2ef1c1d644e0272
3
+ size 4917529456
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:908e9a85c51bd1de0fa3883cba1d5225f2caf579bc5c935e0759a5a366e32341
3
- size 4968641256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:877a5e24da880f00d15f1a2c190df9883f0fae33139b548bae62012c7cc73f3c
3
+ size 4968641192
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ad3678f9b8e71d1ad181fe092fa1c1ed91d83319a0342de1ff885399c4e51c3
3
- size 4934769840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d40e600ea6b9b3553b5af4cdde461fca9d0fc4b99d0f08d66488c642215733e
3
+ size 4934769784
model.safetensors.index.json CHANGED
@@ -93,7 +93,7 @@
93
  "model.mamba_layers.0.mamba.conv1d.bias": "model-00001-of-00004.safetensors",
94
  "model.mamba_layers.0.mamba.conv1d.weight": "model-00001-of-00004.safetensors",
95
  "model.mamba_layers.0.mamba.dt_bias": "model-00001-of-00004.safetensors",
96
- "model.mamba_layers.0.mamba.in_proj.0.weight": "model-00001-of-00004.safetensors",
97
  "model.mamba_layers.0.mamba.norm.weight": "model-00001-of-00004.safetensors",
98
  "model.mamba_layers.0.mamba.out_proj.weight": "model-00001-of-00004.safetensors",
99
  "model.mamba_layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
@@ -102,7 +102,7 @@
102
  "model.mamba_layers.1.mamba.conv1d.bias": "model-00001-of-00004.safetensors",
103
  "model.mamba_layers.1.mamba.conv1d.weight": "model-00001-of-00004.safetensors",
104
  "model.mamba_layers.1.mamba.dt_bias": "model-00001-of-00004.safetensors",
105
- "model.mamba_layers.1.mamba.in_proj.0.weight": "model-00001-of-00004.safetensors",
106
  "model.mamba_layers.1.mamba.norm.weight": "model-00001-of-00004.safetensors",
107
  "model.mamba_layers.1.mamba.out_proj.weight": "model-00001-of-00004.safetensors",
108
  "model.mamba_layers.10.input_layernorm.weight": "model-00001-of-00004.safetensors",
@@ -111,7 +111,7 @@
111
  "model.mamba_layers.10.mamba.conv1d.bias": "model-00001-of-00004.safetensors",
112
  "model.mamba_layers.10.mamba.conv1d.weight": "model-00001-of-00004.safetensors",
113
  "model.mamba_layers.10.mamba.dt_bias": "model-00001-of-00004.safetensors",
114
- "model.mamba_layers.10.mamba.in_proj.0.weight": "model-00001-of-00004.safetensors",
115
  "model.mamba_layers.10.mamba.norm.weight": "model-00001-of-00004.safetensors",
116
  "model.mamba_layers.10.mamba.out_proj.weight": "model-00001-of-00004.safetensors",
117
  "model.mamba_layers.11.input_layernorm.weight": "model-00001-of-00004.safetensors",
@@ -120,7 +120,7 @@
120
  "model.mamba_layers.11.mamba.conv1d.bias": "model-00001-of-00004.safetensors",
121
  "model.mamba_layers.11.mamba.conv1d.weight": "model-00001-of-00004.safetensors",
122
  "model.mamba_layers.11.mamba.dt_bias": "model-00001-of-00004.safetensors",
123
- "model.mamba_layers.11.mamba.in_proj.0.weight": "model-00001-of-00004.safetensors",
124
  "model.mamba_layers.11.mamba.norm.weight": "model-00001-of-00004.safetensors",
125
  "model.mamba_layers.11.mamba.out_proj.weight": "model-00001-of-00004.safetensors",
126
  "model.mamba_layers.12.input_layernorm.weight": "model-00001-of-00004.safetensors",
@@ -129,7 +129,7 @@
129
  "model.mamba_layers.12.mamba.conv1d.bias": "model-00001-of-00004.safetensors",
130
  "model.mamba_layers.12.mamba.conv1d.weight": "model-00001-of-00004.safetensors",
131
  "model.mamba_layers.12.mamba.dt_bias": "model-00001-of-00004.safetensors",
132
- "model.mamba_layers.12.mamba.in_proj.0.weight": "model-00001-of-00004.safetensors",
133
  "model.mamba_layers.12.mamba.norm.weight": "model-00001-of-00004.safetensors",
134
  "model.mamba_layers.12.mamba.out_proj.weight": "model-00001-of-00004.safetensors",
135
  "model.mamba_layers.13.input_layernorm.weight": "model-00001-of-00004.safetensors",
@@ -138,7 +138,7 @@
138
  "model.mamba_layers.13.mamba.conv1d.bias": "model-00001-of-00004.safetensors",
139
  "model.mamba_layers.13.mamba.conv1d.weight": "model-00001-of-00004.safetensors",
140
  "model.mamba_layers.13.mamba.dt_bias": "model-00001-of-00004.safetensors",
141
- "model.mamba_layers.13.mamba.in_proj.0.weight": "model-00001-of-00004.safetensors",
142
  "model.mamba_layers.13.mamba.norm.weight": "model-00001-of-00004.safetensors",
143
  "model.mamba_layers.13.mamba.out_proj.weight": "model-00001-of-00004.safetensors",
144
  "model.mamba_layers.14.input_layernorm.weight": "model-00001-of-00004.safetensors",
@@ -147,7 +147,7 @@
147
  "model.mamba_layers.14.mamba.conv1d.bias": "model-00001-of-00004.safetensors",
148
  "model.mamba_layers.14.mamba.conv1d.weight": "model-00001-of-00004.safetensors",
149
  "model.mamba_layers.14.mamba.dt_bias": "model-00001-of-00004.safetensors",
150
- "model.mamba_layers.14.mamba.in_proj.0.weight": "model-00001-of-00004.safetensors",
151
  "model.mamba_layers.14.mamba.norm.weight": "model-00001-of-00004.safetensors",
152
  "model.mamba_layers.14.mamba.out_proj.weight": "model-00001-of-00004.safetensors",
153
  "model.mamba_layers.15.input_layernorm.weight": "model-00001-of-00004.safetensors",
@@ -156,7 +156,7 @@
156
  "model.mamba_layers.15.mamba.conv1d.bias": "model-00001-of-00004.safetensors",
157
  "model.mamba_layers.15.mamba.conv1d.weight": "model-00001-of-00004.safetensors",
158
  "model.mamba_layers.15.mamba.dt_bias": "model-00001-of-00004.safetensors",
159
- "model.mamba_layers.15.mamba.in_proj.0.weight": "model-00001-of-00004.safetensors",
160
  "model.mamba_layers.15.mamba.norm.weight": "model-00001-of-00004.safetensors",
161
  "model.mamba_layers.15.mamba.out_proj.weight": "model-00001-of-00004.safetensors",
162
  "model.mamba_layers.16.input_layernorm.weight": "model-00001-of-00004.safetensors",
@@ -165,7 +165,7 @@
165
  "model.mamba_layers.16.mamba.conv1d.bias": "model-00001-of-00004.safetensors",
166
  "model.mamba_layers.16.mamba.conv1d.weight": "model-00001-of-00004.safetensors",
167
  "model.mamba_layers.16.mamba.dt_bias": "model-00001-of-00004.safetensors",
168
- "model.mamba_layers.16.mamba.in_proj.0.weight": "model-00001-of-00004.safetensors",
169
  "model.mamba_layers.16.mamba.norm.weight": "model-00001-of-00004.safetensors",
170
  "model.mamba_layers.16.mamba.out_proj.weight": "model-00001-of-00004.safetensors",
171
  "model.mamba_layers.17.input_layernorm.weight": "model-00001-of-00004.safetensors",
@@ -174,7 +174,7 @@
174
  "model.mamba_layers.17.mamba.conv1d.bias": "model-00001-of-00004.safetensors",
175
  "model.mamba_layers.17.mamba.conv1d.weight": "model-00001-of-00004.safetensors",
176
  "model.mamba_layers.17.mamba.dt_bias": "model-00001-of-00004.safetensors",
177
- "model.mamba_layers.17.mamba.in_proj.0.weight": "model-00001-of-00004.safetensors",
178
  "model.mamba_layers.17.mamba.norm.weight": "model-00001-of-00004.safetensors",
179
  "model.mamba_layers.17.mamba.out_proj.weight": "model-00001-of-00004.safetensors",
180
  "model.mamba_layers.18.input_layernorm.weight": "model-00001-of-00004.safetensors",
@@ -183,7 +183,7 @@
183
  "model.mamba_layers.18.mamba.conv1d.bias": "model-00001-of-00004.safetensors",
184
  "model.mamba_layers.18.mamba.conv1d.weight": "model-00001-of-00004.safetensors",
185
  "model.mamba_layers.18.mamba.dt_bias": "model-00001-of-00004.safetensors",
186
- "model.mamba_layers.18.mamba.in_proj.0.weight": "model-00001-of-00004.safetensors",
187
  "model.mamba_layers.18.mamba.norm.weight": "model-00001-of-00004.safetensors",
188
  "model.mamba_layers.18.mamba.out_proj.weight": "model-00001-of-00004.safetensors",
189
  "model.mamba_layers.19.input_layernorm.weight": "model-00001-of-00004.safetensors",
@@ -192,7 +192,7 @@
192
  "model.mamba_layers.19.mamba.conv1d.bias": "model-00001-of-00004.safetensors",
193
  "model.mamba_layers.19.mamba.conv1d.weight": "model-00001-of-00004.safetensors",
194
  "model.mamba_layers.19.mamba.dt_bias": "model-00001-of-00004.safetensors",
195
- "model.mamba_layers.19.mamba.in_proj.0.weight": "model-00001-of-00004.safetensors",
196
  "model.mamba_layers.19.mamba.norm.weight": "model-00001-of-00004.safetensors",
197
  "model.mamba_layers.19.mamba.out_proj.weight": "model-00001-of-00004.safetensors",
198
  "model.mamba_layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
@@ -201,7 +201,7 @@
201
  "model.mamba_layers.2.mamba.conv1d.bias": "model-00001-of-00004.safetensors",
202
  "model.mamba_layers.2.mamba.conv1d.weight": "model-00001-of-00004.safetensors",
203
  "model.mamba_layers.2.mamba.dt_bias": "model-00001-of-00004.safetensors",
204
- "model.mamba_layers.2.mamba.in_proj.0.weight": "model-00001-of-00004.safetensors",
205
  "model.mamba_layers.2.mamba.norm.weight": "model-00001-of-00004.safetensors",
206
  "model.mamba_layers.2.mamba.out_proj.weight": "model-00001-of-00004.safetensors",
207
  "model.mamba_layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors",
@@ -210,7 +210,7 @@
210
  "model.mamba_layers.20.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
211
  "model.mamba_layers.20.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
212
  "model.mamba_layers.20.mamba.dt_bias": "model-00001-of-00004.safetensors",
213
- "model.mamba_layers.20.mamba.in_proj.0.weight": "model-00002-of-00004.safetensors",
214
  "model.mamba_layers.20.mamba.norm.weight": "model-00002-of-00004.safetensors",
215
  "model.mamba_layers.20.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
216
  "model.mamba_layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors",
@@ -219,7 +219,7 @@
219
  "model.mamba_layers.21.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
220
  "model.mamba_layers.21.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
221
  "model.mamba_layers.21.mamba.dt_bias": "model-00002-of-00004.safetensors",
222
- "model.mamba_layers.21.mamba.in_proj.0.weight": "model-00002-of-00004.safetensors",
223
  "model.mamba_layers.21.mamba.norm.weight": "model-00002-of-00004.safetensors",
224
  "model.mamba_layers.21.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
225
  "model.mamba_layers.22.input_layernorm.weight": "model-00002-of-00004.safetensors",
@@ -228,7 +228,7 @@
228
  "model.mamba_layers.22.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
229
  "model.mamba_layers.22.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
230
  "model.mamba_layers.22.mamba.dt_bias": "model-00002-of-00004.safetensors",
231
- "model.mamba_layers.22.mamba.in_proj.0.weight": "model-00002-of-00004.safetensors",
232
  "model.mamba_layers.22.mamba.norm.weight": "model-00002-of-00004.safetensors",
233
  "model.mamba_layers.22.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
234
  "model.mamba_layers.23.input_layernorm.weight": "model-00002-of-00004.safetensors",
@@ -237,7 +237,7 @@
237
  "model.mamba_layers.23.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
238
  "model.mamba_layers.23.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
239
  "model.mamba_layers.23.mamba.dt_bias": "model-00002-of-00004.safetensors",
240
- "model.mamba_layers.23.mamba.in_proj.0.weight": "model-00002-of-00004.safetensors",
241
  "model.mamba_layers.23.mamba.norm.weight": "model-00002-of-00004.safetensors",
242
  "model.mamba_layers.23.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
243
  "model.mamba_layers.24.input_layernorm.weight": "model-00002-of-00004.safetensors",
@@ -246,7 +246,7 @@
246
  "model.mamba_layers.24.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
247
  "model.mamba_layers.24.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
248
  "model.mamba_layers.24.mamba.dt_bias": "model-00002-of-00004.safetensors",
249
- "model.mamba_layers.24.mamba.in_proj.0.weight": "model-00002-of-00004.safetensors",
250
  "model.mamba_layers.24.mamba.norm.weight": "model-00002-of-00004.safetensors",
251
  "model.mamba_layers.24.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
252
  "model.mamba_layers.25.input_layernorm.weight": "model-00002-of-00004.safetensors",
@@ -255,7 +255,7 @@
255
  "model.mamba_layers.25.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
256
  "model.mamba_layers.25.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
257
  "model.mamba_layers.25.mamba.dt_bias": "model-00002-of-00004.safetensors",
258
- "model.mamba_layers.25.mamba.in_proj.0.weight": "model-00002-of-00004.safetensors",
259
  "model.mamba_layers.25.mamba.norm.weight": "model-00002-of-00004.safetensors",
260
  "model.mamba_layers.25.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
261
  "model.mamba_layers.26.input_layernorm.weight": "model-00002-of-00004.safetensors",
@@ -264,7 +264,7 @@
264
  "model.mamba_layers.26.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
265
  "model.mamba_layers.26.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
266
  "model.mamba_layers.26.mamba.dt_bias": "model-00002-of-00004.safetensors",
267
- "model.mamba_layers.26.mamba.in_proj.0.weight": "model-00002-of-00004.safetensors",
268
  "model.mamba_layers.26.mamba.norm.weight": "model-00002-of-00004.safetensors",
269
  "model.mamba_layers.26.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
270
  "model.mamba_layers.27.input_layernorm.weight": "model-00002-of-00004.safetensors",
@@ -273,7 +273,7 @@
273
  "model.mamba_layers.27.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
274
  "model.mamba_layers.27.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
275
  "model.mamba_layers.27.mamba.dt_bias": "model-00002-of-00004.safetensors",
276
- "model.mamba_layers.27.mamba.in_proj.0.weight": "model-00002-of-00004.safetensors",
277
  "model.mamba_layers.27.mamba.norm.weight": "model-00002-of-00004.safetensors",
278
  "model.mamba_layers.27.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
279
  "model.mamba_layers.28.input_layernorm.weight": "model-00002-of-00004.safetensors",
@@ -282,7 +282,7 @@
282
  "model.mamba_layers.28.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
283
  "model.mamba_layers.28.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
284
  "model.mamba_layers.28.mamba.dt_bias": "model-00002-of-00004.safetensors",
285
- "model.mamba_layers.28.mamba.in_proj.0.weight": "model-00002-of-00004.safetensors",
286
  "model.mamba_layers.28.mamba.norm.weight": "model-00002-of-00004.safetensors",
287
  "model.mamba_layers.28.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
288
  "model.mamba_layers.29.input_layernorm.weight": "model-00002-of-00004.safetensors",
@@ -291,7 +291,7 @@
291
  "model.mamba_layers.29.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
292
  "model.mamba_layers.29.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
293
  "model.mamba_layers.29.mamba.dt_bias": "model-00002-of-00004.safetensors",
294
- "model.mamba_layers.29.mamba.in_proj.0.weight": "model-00002-of-00004.safetensors",
295
  "model.mamba_layers.29.mamba.norm.weight": "model-00002-of-00004.safetensors",
296
  "model.mamba_layers.29.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
297
  "model.mamba_layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
@@ -300,7 +300,7 @@
300
  "model.mamba_layers.3.mamba.conv1d.bias": "model-00001-of-00004.safetensors",
301
  "model.mamba_layers.3.mamba.conv1d.weight": "model-00001-of-00004.safetensors",
302
  "model.mamba_layers.3.mamba.dt_bias": "model-00001-of-00004.safetensors",
303
- "model.mamba_layers.3.mamba.in_proj.0.weight": "model-00001-of-00004.safetensors",
304
  "model.mamba_layers.3.mamba.norm.weight": "model-00001-of-00004.safetensors",
305
  "model.mamba_layers.3.mamba.out_proj.weight": "model-00001-of-00004.safetensors",
306
  "model.mamba_layers.30.input_layernorm.weight": "model-00002-of-00004.safetensors",
@@ -309,7 +309,7 @@
309
  "model.mamba_layers.30.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
310
  "model.mamba_layers.30.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
311
  "model.mamba_layers.30.mamba.dt_bias": "model-00002-of-00004.safetensors",
312
- "model.mamba_layers.30.mamba.in_proj.0.weight": "model-00002-of-00004.safetensors",
313
  "model.mamba_layers.30.mamba.norm.weight": "model-00002-of-00004.safetensors",
314
  "model.mamba_layers.30.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
315
  "model.mamba_layers.31.input_layernorm.weight": "model-00002-of-00004.safetensors",
@@ -318,7 +318,7 @@
318
  "model.mamba_layers.31.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
319
  "model.mamba_layers.31.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
320
  "model.mamba_layers.31.mamba.dt_bias": "model-00002-of-00004.safetensors",
321
- "model.mamba_layers.31.mamba.in_proj.0.weight": "model-00002-of-00004.safetensors",
322
  "model.mamba_layers.31.mamba.norm.weight": "model-00002-of-00004.safetensors",
323
  "model.mamba_layers.31.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
324
  "model.mamba_layers.32.input_layernorm.weight": "model-00002-of-00004.safetensors",
@@ -327,7 +327,7 @@
327
  "model.mamba_layers.32.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
328
  "model.mamba_layers.32.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
329
  "model.mamba_layers.32.mamba.dt_bias": "model-00002-of-00004.safetensors",
330
- "model.mamba_layers.32.mamba.in_proj.0.weight": "model-00002-of-00004.safetensors",
331
  "model.mamba_layers.32.mamba.norm.weight": "model-00002-of-00004.safetensors",
332
  "model.mamba_layers.32.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
333
  "model.mamba_layers.33.input_layernorm.weight": "model-00002-of-00004.safetensors",
@@ -336,7 +336,7 @@
336
  "model.mamba_layers.33.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
337
  "model.mamba_layers.33.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
338
  "model.mamba_layers.33.mamba.dt_bias": "model-00002-of-00004.safetensors",
339
- "model.mamba_layers.33.mamba.in_proj.0.weight": "model-00002-of-00004.safetensors",
340
  "model.mamba_layers.33.mamba.norm.weight": "model-00002-of-00004.safetensors",
341
  "model.mamba_layers.33.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
342
  "model.mamba_layers.34.input_layernorm.weight": "model-00002-of-00004.safetensors",
@@ -345,7 +345,7 @@
345
  "model.mamba_layers.34.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
346
  "model.mamba_layers.34.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
347
  "model.mamba_layers.34.mamba.dt_bias": "model-00002-of-00004.safetensors",
348
- "model.mamba_layers.34.mamba.in_proj.0.weight": "model-00002-of-00004.safetensors",
349
  "model.mamba_layers.34.mamba.norm.weight": "model-00002-of-00004.safetensors",
350
  "model.mamba_layers.34.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
351
  "model.mamba_layers.35.input_layernorm.weight": "model-00002-of-00004.safetensors",
@@ -354,7 +354,7 @@
354
  "model.mamba_layers.35.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
355
  "model.mamba_layers.35.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
356
  "model.mamba_layers.35.mamba.dt_bias": "model-00002-of-00004.safetensors",
357
- "model.mamba_layers.35.mamba.in_proj.0.weight": "model-00002-of-00004.safetensors",
358
  "model.mamba_layers.35.mamba.norm.weight": "model-00002-of-00004.safetensors",
359
  "model.mamba_layers.35.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
360
  "model.mamba_layers.36.input_layernorm.weight": "model-00002-of-00004.safetensors",
@@ -363,7 +363,7 @@
363
  "model.mamba_layers.36.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
364
  "model.mamba_layers.36.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
365
  "model.mamba_layers.36.mamba.dt_bias": "model-00002-of-00004.safetensors",
366
- "model.mamba_layers.36.mamba.in_proj.0.weight": "model-00002-of-00004.safetensors",
367
  "model.mamba_layers.36.mamba.norm.weight": "model-00002-of-00004.safetensors",
368
  "model.mamba_layers.36.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
369
  "model.mamba_layers.37.input_layernorm.weight": "model-00002-of-00004.safetensors",
@@ -372,7 +372,7 @@
372
  "model.mamba_layers.37.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
373
  "model.mamba_layers.37.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
374
  "model.mamba_layers.37.mamba.dt_bias": "model-00002-of-00004.safetensors",
375
- "model.mamba_layers.37.mamba.in_proj.0.weight": "model-00002-of-00004.safetensors",
376
  "model.mamba_layers.37.mamba.norm.weight": "model-00002-of-00004.safetensors",
377
  "model.mamba_layers.37.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
378
  "model.mamba_layers.38.input_layernorm.weight": "model-00002-of-00004.safetensors",
@@ -381,7 +381,7 @@
381
  "model.mamba_layers.38.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
382
  "model.mamba_layers.38.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
383
  "model.mamba_layers.38.mamba.dt_bias": "model-00002-of-00004.safetensors",
384
- "model.mamba_layers.38.mamba.in_proj.0.weight": "model-00002-of-00004.safetensors",
385
  "model.mamba_layers.38.mamba.norm.weight": "model-00002-of-00004.safetensors",
386
  "model.mamba_layers.38.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
387
  "model.mamba_layers.39.input_layernorm.weight": "model-00002-of-00004.safetensors",
@@ -390,7 +390,7 @@
390
  "model.mamba_layers.39.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
391
  "model.mamba_layers.39.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
392
  "model.mamba_layers.39.mamba.dt_bias": "model-00002-of-00004.safetensors",
393
- "model.mamba_layers.39.mamba.in_proj.0.weight": "model-00002-of-00004.safetensors",
394
  "model.mamba_layers.39.mamba.norm.weight": "model-00002-of-00004.safetensors",
395
  "model.mamba_layers.39.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
396
  "model.mamba_layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
@@ -399,7 +399,7 @@
399
  "model.mamba_layers.4.mamba.conv1d.bias": "model-00001-of-00004.safetensors",
400
  "model.mamba_layers.4.mamba.conv1d.weight": "model-00001-of-00004.safetensors",
401
  "model.mamba_layers.4.mamba.dt_bias": "model-00001-of-00004.safetensors",
402
- "model.mamba_layers.4.mamba.in_proj.0.weight": "model-00001-of-00004.safetensors",
403
  "model.mamba_layers.4.mamba.norm.weight": "model-00001-of-00004.safetensors",
404
  "model.mamba_layers.4.mamba.out_proj.weight": "model-00001-of-00004.safetensors",
405
  "model.mamba_layers.40.input_layernorm.weight": "model-00002-of-00004.safetensors",
@@ -408,7 +408,7 @@
408
  "model.mamba_layers.40.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
409
  "model.mamba_layers.40.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
410
  "model.mamba_layers.40.mamba.dt_bias": "model-00002-of-00004.safetensors",
411
- "model.mamba_layers.40.mamba.in_proj.0.weight": "model-00002-of-00004.safetensors",
412
  "model.mamba_layers.40.mamba.norm.weight": "model-00002-of-00004.safetensors",
413
  "model.mamba_layers.40.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
414
  "model.mamba_layers.41.input_layernorm.weight": "model-00002-of-00004.safetensors",
@@ -417,7 +417,7 @@
417
  "model.mamba_layers.41.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
418
  "model.mamba_layers.41.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
419
  "model.mamba_layers.41.mamba.dt_bias": "model-00002-of-00004.safetensors",
420
- "model.mamba_layers.41.mamba.in_proj.0.weight": "model-00002-of-00004.safetensors",
421
  "model.mamba_layers.41.mamba.norm.weight": "model-00002-of-00004.safetensors",
422
  "model.mamba_layers.41.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
423
  "model.mamba_layers.42.input_layernorm.weight": "model-00002-of-00004.safetensors",
@@ -426,7 +426,7 @@
426
  "model.mamba_layers.42.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
427
  "model.mamba_layers.42.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
428
  "model.mamba_layers.42.mamba.dt_bias": "model-00002-of-00004.safetensors",
429
- "model.mamba_layers.42.mamba.in_proj.0.weight": "model-00002-of-00004.safetensors",
430
  "model.mamba_layers.42.mamba.norm.weight": "model-00002-of-00004.safetensors",
431
  "model.mamba_layers.42.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
432
  "model.mamba_layers.43.input_layernorm.weight": "model-00002-of-00004.safetensors",
@@ -435,7 +435,7 @@
435
  "model.mamba_layers.43.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
436
  "model.mamba_layers.43.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
437
  "model.mamba_layers.43.mamba.dt_bias": "model-00002-of-00004.safetensors",
438
- "model.mamba_layers.43.mamba.in_proj.0.weight": "model-00002-of-00004.safetensors",
439
  "model.mamba_layers.43.mamba.norm.weight": "model-00002-of-00004.safetensors",
440
  "model.mamba_layers.43.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
441
  "model.mamba_layers.44.input_layernorm.weight": "model-00002-of-00004.safetensors",
@@ -444,7 +444,7 @@
444
  "model.mamba_layers.44.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
445
  "model.mamba_layers.44.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
446
  "model.mamba_layers.44.mamba.dt_bias": "model-00002-of-00004.safetensors",
447
- "model.mamba_layers.44.mamba.in_proj.0.weight": "model-00002-of-00004.safetensors",
448
  "model.mamba_layers.44.mamba.norm.weight": "model-00002-of-00004.safetensors",
449
  "model.mamba_layers.44.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
450
  "model.mamba_layers.45.input_layernorm.weight": "model-00002-of-00004.safetensors",
@@ -453,7 +453,7 @@
453
  "model.mamba_layers.45.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
454
  "model.mamba_layers.45.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
455
  "model.mamba_layers.45.mamba.dt_bias": "model-00002-of-00004.safetensors",
456
- "model.mamba_layers.45.mamba.in_proj.0.weight": "model-00002-of-00004.safetensors",
457
  "model.mamba_layers.45.mamba.norm.weight": "model-00002-of-00004.safetensors",
458
  "model.mamba_layers.45.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
459
  "model.mamba_layers.46.input_layernorm.weight": "model-00002-of-00004.safetensors",
@@ -462,7 +462,7 @@
462
  "model.mamba_layers.46.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
463
  "model.mamba_layers.46.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
464
  "model.mamba_layers.46.mamba.dt_bias": "model-00002-of-00004.safetensors",
465
- "model.mamba_layers.46.mamba.in_proj.0.weight": "model-00002-of-00004.safetensors",
466
  "model.mamba_layers.46.mamba.norm.weight": "model-00002-of-00004.safetensors",
467
  "model.mamba_layers.46.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
468
  "model.mamba_layers.47.input_layernorm.weight": "model-00002-of-00004.safetensors",
@@ -471,7 +471,7 @@
471
  "model.mamba_layers.47.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
472
  "model.mamba_layers.47.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
473
  "model.mamba_layers.47.mamba.dt_bias": "model-00002-of-00004.safetensors",
474
- "model.mamba_layers.47.mamba.in_proj.0.weight": "model-00002-of-00004.safetensors",
475
  "model.mamba_layers.47.mamba.norm.weight": "model-00002-of-00004.safetensors",
476
  "model.mamba_layers.47.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
477
  "model.mamba_layers.48.input_layernorm.weight": "model-00002-of-00004.safetensors",
@@ -480,7 +480,7 @@
480
  "model.mamba_layers.48.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
481
  "model.mamba_layers.48.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
482
  "model.mamba_layers.48.mamba.dt_bias": "model-00002-of-00004.safetensors",
483
- "model.mamba_layers.48.mamba.in_proj.0.weight": "model-00002-of-00004.safetensors",
484
  "model.mamba_layers.48.mamba.norm.weight": "model-00002-of-00004.safetensors",
485
  "model.mamba_layers.48.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
486
  "model.mamba_layers.49.input_layernorm.weight": "model-00002-of-00004.safetensors",
@@ -489,7 +489,7 @@
489
  "model.mamba_layers.49.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
490
  "model.mamba_layers.49.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
491
  "model.mamba_layers.49.mamba.dt_bias": "model-00002-of-00004.safetensors",
492
- "model.mamba_layers.49.mamba.in_proj.0.weight": "model-00002-of-00004.safetensors",
493
  "model.mamba_layers.49.mamba.norm.weight": "model-00002-of-00004.safetensors",
494
  "model.mamba_layers.49.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
495
  "model.mamba_layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
@@ -498,7 +498,7 @@
498
  "model.mamba_layers.5.mamba.conv1d.bias": "model-00001-of-00004.safetensors",
499
  "model.mamba_layers.5.mamba.conv1d.weight": "model-00001-of-00004.safetensors",
500
  "model.mamba_layers.5.mamba.dt_bias": "model-00001-of-00004.safetensors",
501
- "model.mamba_layers.5.mamba.in_proj.0.weight": "model-00001-of-00004.safetensors",
502
  "model.mamba_layers.5.mamba.norm.weight": "model-00001-of-00004.safetensors",
503
  "model.mamba_layers.5.mamba.out_proj.weight": "model-00001-of-00004.safetensors",
504
  "model.mamba_layers.50.input_layernorm.weight": "model-00002-of-00004.safetensors",
@@ -507,7 +507,7 @@
507
  "model.mamba_layers.50.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
508
  "model.mamba_layers.50.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
509
  "model.mamba_layers.50.mamba.dt_bias": "model-00002-of-00004.safetensors",
510
- "model.mamba_layers.50.mamba.in_proj.0.weight": "model-00002-of-00004.safetensors",
511
  "model.mamba_layers.50.mamba.norm.weight": "model-00002-of-00004.safetensors",
512
  "model.mamba_layers.50.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
513
  "model.mamba_layers.51.input_layernorm.weight": "model-00003-of-00004.safetensors",
@@ -516,7 +516,7 @@
516
  "model.mamba_layers.51.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
517
  "model.mamba_layers.51.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
518
  "model.mamba_layers.51.mamba.dt_bias": "model-00002-of-00004.safetensors",
519
- "model.mamba_layers.51.mamba.in_proj.0.weight": "model-00002-of-00004.safetensors",
520
  "model.mamba_layers.51.mamba.norm.weight": "model-00002-of-00004.safetensors",
521
  "model.mamba_layers.51.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
522
  "model.mamba_layers.52.input_layernorm.weight": "model-00003-of-00004.safetensors",
@@ -525,7 +525,7 @@
525
  "model.mamba_layers.52.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
526
  "model.mamba_layers.52.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
527
  "model.mamba_layers.52.mamba.dt_bias": "model-00003-of-00004.safetensors",
528
- "model.mamba_layers.52.mamba.in_proj.0.weight": "model-00003-of-00004.safetensors",
529
  "model.mamba_layers.52.mamba.norm.weight": "model-00003-of-00004.safetensors",
530
  "model.mamba_layers.52.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
531
  "model.mamba_layers.53.input_layernorm.weight": "model-00003-of-00004.safetensors",
@@ -534,7 +534,7 @@
534
  "model.mamba_layers.53.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
535
  "model.mamba_layers.53.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
536
  "model.mamba_layers.53.mamba.dt_bias": "model-00003-of-00004.safetensors",
537
- "model.mamba_layers.53.mamba.in_proj.0.weight": "model-00003-of-00004.safetensors",
538
  "model.mamba_layers.53.mamba.norm.weight": "model-00003-of-00004.safetensors",
539
  "model.mamba_layers.53.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
540
  "model.mamba_layers.54.input_layernorm.weight": "model-00003-of-00004.safetensors",
@@ -543,7 +543,7 @@
543
  "model.mamba_layers.54.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
544
  "model.mamba_layers.54.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
545
  "model.mamba_layers.54.mamba.dt_bias": "model-00003-of-00004.safetensors",
546
- "model.mamba_layers.54.mamba.in_proj.0.weight": "model-00003-of-00004.safetensors",
547
  "model.mamba_layers.54.mamba.norm.weight": "model-00003-of-00004.safetensors",
548
  "model.mamba_layers.54.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
549
  "model.mamba_layers.55.input_layernorm.weight": "model-00003-of-00004.safetensors",
@@ -552,7 +552,7 @@
552
  "model.mamba_layers.55.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
553
  "model.mamba_layers.55.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
554
  "model.mamba_layers.55.mamba.dt_bias": "model-00003-of-00004.safetensors",
555
- "model.mamba_layers.55.mamba.in_proj.0.weight": "model-00003-of-00004.safetensors",
556
  "model.mamba_layers.55.mamba.norm.weight": "model-00003-of-00004.safetensors",
557
  "model.mamba_layers.55.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
558
  "model.mamba_layers.56.input_layernorm.weight": "model-00003-of-00004.safetensors",
@@ -561,7 +561,7 @@
561
  "model.mamba_layers.56.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
562
  "model.mamba_layers.56.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
563
  "model.mamba_layers.56.mamba.dt_bias": "model-00003-of-00004.safetensors",
564
- "model.mamba_layers.56.mamba.in_proj.0.weight": "model-00003-of-00004.safetensors",
565
  "model.mamba_layers.56.mamba.norm.weight": "model-00003-of-00004.safetensors",
566
  "model.mamba_layers.56.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
567
  "model.mamba_layers.57.input_layernorm.weight": "model-00003-of-00004.safetensors",
@@ -570,7 +570,7 @@
570
  "model.mamba_layers.57.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
571
  "model.mamba_layers.57.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
572
  "model.mamba_layers.57.mamba.dt_bias": "model-00003-of-00004.safetensors",
573
- "model.mamba_layers.57.mamba.in_proj.0.weight": "model-00003-of-00004.safetensors",
574
  "model.mamba_layers.57.mamba.norm.weight": "model-00003-of-00004.safetensors",
575
  "model.mamba_layers.57.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
576
  "model.mamba_layers.58.input_layernorm.weight": "model-00003-of-00004.safetensors",
@@ -579,7 +579,7 @@
579
  "model.mamba_layers.58.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
580
  "model.mamba_layers.58.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
581
  "model.mamba_layers.58.mamba.dt_bias": "model-00003-of-00004.safetensors",
582
- "model.mamba_layers.58.mamba.in_proj.0.weight": "model-00003-of-00004.safetensors",
583
  "model.mamba_layers.58.mamba.norm.weight": "model-00003-of-00004.safetensors",
584
  "model.mamba_layers.58.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
585
  "model.mamba_layers.59.input_layernorm.weight": "model-00003-of-00004.safetensors",
@@ -588,7 +588,7 @@
588
  "model.mamba_layers.59.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
589
  "model.mamba_layers.59.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
590
  "model.mamba_layers.59.mamba.dt_bias": "model-00003-of-00004.safetensors",
591
- "model.mamba_layers.59.mamba.in_proj.0.weight": "model-00003-of-00004.safetensors",
592
  "model.mamba_layers.59.mamba.norm.weight": "model-00003-of-00004.safetensors",
593
  "model.mamba_layers.59.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
594
  "model.mamba_layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
@@ -597,7 +597,7 @@
597
  "model.mamba_layers.6.mamba.conv1d.bias": "model-00001-of-00004.safetensors",
598
  "model.mamba_layers.6.mamba.conv1d.weight": "model-00001-of-00004.safetensors",
599
  "model.mamba_layers.6.mamba.dt_bias": "model-00001-of-00004.safetensors",
600
- "model.mamba_layers.6.mamba.in_proj.0.weight": "model-00001-of-00004.safetensors",
601
  "model.mamba_layers.6.mamba.norm.weight": "model-00001-of-00004.safetensors",
602
  "model.mamba_layers.6.mamba.out_proj.weight": "model-00001-of-00004.safetensors",
603
  "model.mamba_layers.60.input_layernorm.weight": "model-00003-of-00004.safetensors",
@@ -606,7 +606,7 @@
606
  "model.mamba_layers.60.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
607
  "model.mamba_layers.60.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
608
  "model.mamba_layers.60.mamba.dt_bias": "model-00003-of-00004.safetensors",
609
- "model.mamba_layers.60.mamba.in_proj.0.weight": "model-00003-of-00004.safetensors",
610
  "model.mamba_layers.60.mamba.norm.weight": "model-00003-of-00004.safetensors",
611
  "model.mamba_layers.60.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
612
  "model.mamba_layers.61.input_layernorm.weight": "model-00003-of-00004.safetensors",
@@ -615,7 +615,7 @@
615
  "model.mamba_layers.61.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
616
  "model.mamba_layers.61.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
617
  "model.mamba_layers.61.mamba.dt_bias": "model-00003-of-00004.safetensors",
618
- "model.mamba_layers.61.mamba.in_proj.0.weight": "model-00003-of-00004.safetensors",
619
  "model.mamba_layers.61.mamba.norm.weight": "model-00003-of-00004.safetensors",
620
  "model.mamba_layers.61.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
621
  "model.mamba_layers.62.input_layernorm.weight": "model-00003-of-00004.safetensors",
@@ -624,7 +624,7 @@
624
  "model.mamba_layers.62.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
625
  "model.mamba_layers.62.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
626
  "model.mamba_layers.62.mamba.dt_bias": "model-00003-of-00004.safetensors",
627
- "model.mamba_layers.62.mamba.in_proj.0.weight": "model-00003-of-00004.safetensors",
628
  "model.mamba_layers.62.mamba.norm.weight": "model-00003-of-00004.safetensors",
629
  "model.mamba_layers.62.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
630
  "model.mamba_layers.63.input_layernorm.weight": "model-00003-of-00004.safetensors",
@@ -633,7 +633,7 @@
633
  "model.mamba_layers.63.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
634
  "model.mamba_layers.63.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
635
  "model.mamba_layers.63.mamba.dt_bias": "model-00003-of-00004.safetensors",
636
- "model.mamba_layers.63.mamba.in_proj.0.weight": "model-00003-of-00004.safetensors",
637
  "model.mamba_layers.63.mamba.norm.weight": "model-00003-of-00004.safetensors",
638
  "model.mamba_layers.63.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
639
  "model.mamba_layers.64.input_layernorm.weight": "model-00003-of-00004.safetensors",
@@ -642,7 +642,7 @@
642
  "model.mamba_layers.64.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
643
  "model.mamba_layers.64.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
644
  "model.mamba_layers.64.mamba.dt_bias": "model-00003-of-00004.safetensors",
645
- "model.mamba_layers.64.mamba.in_proj.0.weight": "model-00003-of-00004.safetensors",
646
  "model.mamba_layers.64.mamba.norm.weight": "model-00003-of-00004.safetensors",
647
  "model.mamba_layers.64.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
648
  "model.mamba_layers.65.input_layernorm.weight": "model-00003-of-00004.safetensors",
@@ -651,7 +651,7 @@
651
  "model.mamba_layers.65.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
652
  "model.mamba_layers.65.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
653
  "model.mamba_layers.65.mamba.dt_bias": "model-00003-of-00004.safetensors",
654
- "model.mamba_layers.65.mamba.in_proj.0.weight": "model-00003-of-00004.safetensors",
655
  "model.mamba_layers.65.mamba.norm.weight": "model-00003-of-00004.safetensors",
656
  "model.mamba_layers.65.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
657
  "model.mamba_layers.66.input_layernorm.weight": "model-00003-of-00004.safetensors",
@@ -660,7 +660,7 @@
660
  "model.mamba_layers.66.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
661
  "model.mamba_layers.66.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
662
  "model.mamba_layers.66.mamba.dt_bias": "model-00003-of-00004.safetensors",
663
- "model.mamba_layers.66.mamba.in_proj.0.weight": "model-00003-of-00004.safetensors",
664
  "model.mamba_layers.66.mamba.norm.weight": "model-00003-of-00004.safetensors",
665
  "model.mamba_layers.66.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
666
  "model.mamba_layers.67.input_layernorm.weight": "model-00003-of-00004.safetensors",
@@ -669,7 +669,7 @@
669
  "model.mamba_layers.67.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
670
  "model.mamba_layers.67.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
671
  "model.mamba_layers.67.mamba.dt_bias": "model-00003-of-00004.safetensors",
672
- "model.mamba_layers.67.mamba.in_proj.0.weight": "model-00003-of-00004.safetensors",
673
  "model.mamba_layers.67.mamba.norm.weight": "model-00003-of-00004.safetensors",
674
  "model.mamba_layers.67.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
675
  "model.mamba_layers.68.input_layernorm.weight": "model-00003-of-00004.safetensors",
@@ -678,7 +678,7 @@
678
  "model.mamba_layers.68.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
679
  "model.mamba_layers.68.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
680
  "model.mamba_layers.68.mamba.dt_bias": "model-00003-of-00004.safetensors",
681
- "model.mamba_layers.68.mamba.in_proj.0.weight": "model-00003-of-00004.safetensors",
682
  "model.mamba_layers.68.mamba.norm.weight": "model-00003-of-00004.safetensors",
683
  "model.mamba_layers.68.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
684
  "model.mamba_layers.69.input_layernorm.weight": "model-00003-of-00004.safetensors",
@@ -687,7 +687,7 @@
687
  "model.mamba_layers.69.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
688
  "model.mamba_layers.69.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
689
  "model.mamba_layers.69.mamba.dt_bias": "model-00003-of-00004.safetensors",
690
- "model.mamba_layers.69.mamba.in_proj.0.weight": "model-00003-of-00004.safetensors",
691
  "model.mamba_layers.69.mamba.norm.weight": "model-00003-of-00004.safetensors",
692
  "model.mamba_layers.69.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
693
  "model.mamba_layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
@@ -696,7 +696,7 @@
696
  "model.mamba_layers.7.mamba.conv1d.bias": "model-00001-of-00004.safetensors",
697
  "model.mamba_layers.7.mamba.conv1d.weight": "model-00001-of-00004.safetensors",
698
  "model.mamba_layers.7.mamba.dt_bias": "model-00001-of-00004.safetensors",
699
- "model.mamba_layers.7.mamba.in_proj.0.weight": "model-00001-of-00004.safetensors",
700
  "model.mamba_layers.7.mamba.norm.weight": "model-00001-of-00004.safetensors",
701
  "model.mamba_layers.7.mamba.out_proj.weight": "model-00001-of-00004.safetensors",
702
  "model.mamba_layers.70.input_layernorm.weight": "model-00003-of-00004.safetensors",
@@ -705,7 +705,7 @@
705
  "model.mamba_layers.70.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
706
  "model.mamba_layers.70.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
707
  "model.mamba_layers.70.mamba.dt_bias": "model-00003-of-00004.safetensors",
708
- "model.mamba_layers.70.mamba.in_proj.0.weight": "model-00003-of-00004.safetensors",
709
  "model.mamba_layers.70.mamba.norm.weight": "model-00003-of-00004.safetensors",
710
  "model.mamba_layers.70.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
711
  "model.mamba_layers.71.input_layernorm.weight": "model-00003-of-00004.safetensors",
@@ -714,7 +714,7 @@
714
  "model.mamba_layers.71.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
715
  "model.mamba_layers.71.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
716
  "model.mamba_layers.71.mamba.dt_bias": "model-00003-of-00004.safetensors",
717
- "model.mamba_layers.71.mamba.in_proj.0.weight": "model-00003-of-00004.safetensors",
718
  "model.mamba_layers.71.mamba.norm.weight": "model-00003-of-00004.safetensors",
719
  "model.mamba_layers.71.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
720
  "model.mamba_layers.72.input_layernorm.weight": "model-00003-of-00004.safetensors",
@@ -723,7 +723,7 @@
723
  "model.mamba_layers.72.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
724
  "model.mamba_layers.72.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
725
  "model.mamba_layers.72.mamba.dt_bias": "model-00003-of-00004.safetensors",
726
- "model.mamba_layers.72.mamba.in_proj.0.weight": "model-00003-of-00004.safetensors",
727
  "model.mamba_layers.72.mamba.norm.weight": "model-00003-of-00004.safetensors",
728
  "model.mamba_layers.72.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
729
  "model.mamba_layers.73.input_layernorm.weight": "model-00003-of-00004.safetensors",
@@ -732,7 +732,7 @@
732
  "model.mamba_layers.73.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
733
  "model.mamba_layers.73.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
734
  "model.mamba_layers.73.mamba.dt_bias": "model-00003-of-00004.safetensors",
735
- "model.mamba_layers.73.mamba.in_proj.0.weight": "model-00003-of-00004.safetensors",
736
  "model.mamba_layers.73.mamba.norm.weight": "model-00003-of-00004.safetensors",
737
  "model.mamba_layers.73.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
738
  "model.mamba_layers.74.input_layernorm.weight": "model-00003-of-00004.safetensors",
@@ -741,7 +741,7 @@
741
  "model.mamba_layers.74.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
742
  "model.mamba_layers.74.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
743
  "model.mamba_layers.74.mamba.dt_bias": "model-00003-of-00004.safetensors",
744
- "model.mamba_layers.74.mamba.in_proj.0.weight": "model-00003-of-00004.safetensors",
745
  "model.mamba_layers.74.mamba.norm.weight": "model-00003-of-00004.safetensors",
746
  "model.mamba_layers.74.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
747
  "model.mamba_layers.75.input_layernorm.weight": "model-00003-of-00004.safetensors",
@@ -750,7 +750,7 @@
750
  "model.mamba_layers.75.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
751
  "model.mamba_layers.75.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
752
  "model.mamba_layers.75.mamba.dt_bias": "model-00003-of-00004.safetensors",
753
- "model.mamba_layers.75.mamba.in_proj.0.weight": "model-00003-of-00004.safetensors",
754
  "model.mamba_layers.75.mamba.norm.weight": "model-00003-of-00004.safetensors",
755
  "model.mamba_layers.75.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
756
  "model.mamba_layers.76.input_layernorm.weight": "model-00003-of-00004.safetensors",
@@ -759,7 +759,7 @@
759
  "model.mamba_layers.76.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
760
  "model.mamba_layers.76.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
761
  "model.mamba_layers.76.mamba.dt_bias": "model-00003-of-00004.safetensors",
762
- "model.mamba_layers.76.mamba.in_proj.0.weight": "model-00003-of-00004.safetensors",
763
  "model.mamba_layers.76.mamba.norm.weight": "model-00003-of-00004.safetensors",
764
  "model.mamba_layers.76.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
765
  "model.mamba_layers.77.input_layernorm.weight": "model-00003-of-00004.safetensors",
@@ -768,7 +768,7 @@
768
  "model.mamba_layers.77.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
769
  "model.mamba_layers.77.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
770
  "model.mamba_layers.77.mamba.dt_bias": "model-00003-of-00004.safetensors",
771
- "model.mamba_layers.77.mamba.in_proj.0.weight": "model-00003-of-00004.safetensors",
772
  "model.mamba_layers.77.mamba.norm.weight": "model-00003-of-00004.safetensors",
773
  "model.mamba_layers.77.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
774
  "model.mamba_layers.78.input_layernorm.weight": "model-00003-of-00004.safetensors",
@@ -777,7 +777,7 @@
777
  "model.mamba_layers.78.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
778
  "model.mamba_layers.78.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
779
  "model.mamba_layers.78.mamba.dt_bias": "model-00003-of-00004.safetensors",
780
- "model.mamba_layers.78.mamba.in_proj.0.weight": "model-00003-of-00004.safetensors",
781
  "model.mamba_layers.78.mamba.norm.weight": "model-00003-of-00004.safetensors",
782
  "model.mamba_layers.78.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
783
  "model.mamba_layers.79.input_layernorm.weight": "model-00003-of-00004.safetensors",
@@ -786,7 +786,7 @@
786
  "model.mamba_layers.79.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
787
  "model.mamba_layers.79.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
788
  "model.mamba_layers.79.mamba.dt_bias": "model-00003-of-00004.safetensors",
789
- "model.mamba_layers.79.mamba.in_proj.0.weight": "model-00003-of-00004.safetensors",
790
  "model.mamba_layers.79.mamba.norm.weight": "model-00003-of-00004.safetensors",
791
  "model.mamba_layers.79.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
792
  "model.mamba_layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors",
@@ -795,7 +795,7 @@
795
  "model.mamba_layers.8.mamba.conv1d.bias": "model-00001-of-00004.safetensors",
796
  "model.mamba_layers.8.mamba.conv1d.weight": "model-00001-of-00004.safetensors",
797
  "model.mamba_layers.8.mamba.dt_bias": "model-00001-of-00004.safetensors",
798
- "model.mamba_layers.8.mamba.in_proj.0.weight": "model-00001-of-00004.safetensors",
799
  "model.mamba_layers.8.mamba.norm.weight": "model-00001-of-00004.safetensors",
800
  "model.mamba_layers.8.mamba.out_proj.weight": "model-00001-of-00004.safetensors",
801
  "model.mamba_layers.80.input_layernorm.weight": "model-00003-of-00004.safetensors",
@@ -804,7 +804,7 @@
804
  "model.mamba_layers.80.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
805
  "model.mamba_layers.80.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
806
  "model.mamba_layers.80.mamba.dt_bias": "model-00003-of-00004.safetensors",
807
- "model.mamba_layers.80.mamba.in_proj.0.weight": "model-00003-of-00004.safetensors",
808
  "model.mamba_layers.80.mamba.norm.weight": "model-00003-of-00004.safetensors",
809
  "model.mamba_layers.80.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
810
  "model.mamba_layers.9.input_layernorm.weight": "model-00001-of-00004.safetensors",
@@ -813,8 +813,8 @@
813
  "model.mamba_layers.9.mamba.conv1d.bias": "model-00001-of-00004.safetensors",
814
  "model.mamba_layers.9.mamba.conv1d.weight": "model-00001-of-00004.safetensors",
815
  "model.mamba_layers.9.mamba.dt_bias": "model-00001-of-00004.safetensors",
816
- "model.mamba_layers.9.mamba.in_proj.0.weight": "model-00001-of-00004.safetensors",
817
  "model.mamba_layers.9.mamba.norm.weight": "model-00001-of-00004.safetensors",
818
  "model.mamba_layers.9.mamba.out_proj.weight": "model-00001-of-00004.safetensors"
819
  }
820
- }
 
93
  "model.mamba_layers.0.mamba.conv1d.bias": "model-00001-of-00004.safetensors",
94
  "model.mamba_layers.0.mamba.conv1d.weight": "model-00001-of-00004.safetensors",
95
  "model.mamba_layers.0.mamba.dt_bias": "model-00001-of-00004.safetensors",
96
+ "model.mamba_layers.0.mamba.in_proj.weight": "model-00001-of-00004.safetensors",
97
  "model.mamba_layers.0.mamba.norm.weight": "model-00001-of-00004.safetensors",
98
  "model.mamba_layers.0.mamba.out_proj.weight": "model-00001-of-00004.safetensors",
99
  "model.mamba_layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
 
102
  "model.mamba_layers.1.mamba.conv1d.bias": "model-00001-of-00004.safetensors",
103
  "model.mamba_layers.1.mamba.conv1d.weight": "model-00001-of-00004.safetensors",
104
  "model.mamba_layers.1.mamba.dt_bias": "model-00001-of-00004.safetensors",
105
+ "model.mamba_layers.1.mamba.in_proj.weight": "model-00001-of-00004.safetensors",
106
  "model.mamba_layers.1.mamba.norm.weight": "model-00001-of-00004.safetensors",
107
  "model.mamba_layers.1.mamba.out_proj.weight": "model-00001-of-00004.safetensors",
108
  "model.mamba_layers.10.input_layernorm.weight": "model-00001-of-00004.safetensors",
 
111
  "model.mamba_layers.10.mamba.conv1d.bias": "model-00001-of-00004.safetensors",
112
  "model.mamba_layers.10.mamba.conv1d.weight": "model-00001-of-00004.safetensors",
113
  "model.mamba_layers.10.mamba.dt_bias": "model-00001-of-00004.safetensors",
114
+ "model.mamba_layers.10.mamba.in_proj.weight": "model-00001-of-00004.safetensors",
115
  "model.mamba_layers.10.mamba.norm.weight": "model-00001-of-00004.safetensors",
116
  "model.mamba_layers.10.mamba.out_proj.weight": "model-00001-of-00004.safetensors",
117
  "model.mamba_layers.11.input_layernorm.weight": "model-00001-of-00004.safetensors",
 
120
  "model.mamba_layers.11.mamba.conv1d.bias": "model-00001-of-00004.safetensors",
121
  "model.mamba_layers.11.mamba.conv1d.weight": "model-00001-of-00004.safetensors",
122
  "model.mamba_layers.11.mamba.dt_bias": "model-00001-of-00004.safetensors",
123
+ "model.mamba_layers.11.mamba.in_proj.weight": "model-00001-of-00004.safetensors",
124
  "model.mamba_layers.11.mamba.norm.weight": "model-00001-of-00004.safetensors",
125
  "model.mamba_layers.11.mamba.out_proj.weight": "model-00001-of-00004.safetensors",
126
  "model.mamba_layers.12.input_layernorm.weight": "model-00001-of-00004.safetensors",
 
129
  "model.mamba_layers.12.mamba.conv1d.bias": "model-00001-of-00004.safetensors",
130
  "model.mamba_layers.12.mamba.conv1d.weight": "model-00001-of-00004.safetensors",
131
  "model.mamba_layers.12.mamba.dt_bias": "model-00001-of-00004.safetensors",
132
+ "model.mamba_layers.12.mamba.in_proj.weight": "model-00001-of-00004.safetensors",
133
  "model.mamba_layers.12.mamba.norm.weight": "model-00001-of-00004.safetensors",
134
  "model.mamba_layers.12.mamba.out_proj.weight": "model-00001-of-00004.safetensors",
135
  "model.mamba_layers.13.input_layernorm.weight": "model-00001-of-00004.safetensors",
 
138
  "model.mamba_layers.13.mamba.conv1d.bias": "model-00001-of-00004.safetensors",
139
  "model.mamba_layers.13.mamba.conv1d.weight": "model-00001-of-00004.safetensors",
140
  "model.mamba_layers.13.mamba.dt_bias": "model-00001-of-00004.safetensors",
141
+ "model.mamba_layers.13.mamba.in_proj.weight": "model-00001-of-00004.safetensors",
142
  "model.mamba_layers.13.mamba.norm.weight": "model-00001-of-00004.safetensors",
143
  "model.mamba_layers.13.mamba.out_proj.weight": "model-00001-of-00004.safetensors",
144
  "model.mamba_layers.14.input_layernorm.weight": "model-00001-of-00004.safetensors",
 
147
  "model.mamba_layers.14.mamba.conv1d.bias": "model-00001-of-00004.safetensors",
148
  "model.mamba_layers.14.mamba.conv1d.weight": "model-00001-of-00004.safetensors",
149
  "model.mamba_layers.14.mamba.dt_bias": "model-00001-of-00004.safetensors",
150
+ "model.mamba_layers.14.mamba.in_proj.weight": "model-00001-of-00004.safetensors",
151
  "model.mamba_layers.14.mamba.norm.weight": "model-00001-of-00004.safetensors",
152
  "model.mamba_layers.14.mamba.out_proj.weight": "model-00001-of-00004.safetensors",
153
  "model.mamba_layers.15.input_layernorm.weight": "model-00001-of-00004.safetensors",
 
156
  "model.mamba_layers.15.mamba.conv1d.bias": "model-00001-of-00004.safetensors",
157
  "model.mamba_layers.15.mamba.conv1d.weight": "model-00001-of-00004.safetensors",
158
  "model.mamba_layers.15.mamba.dt_bias": "model-00001-of-00004.safetensors",
159
+ "model.mamba_layers.15.mamba.in_proj.weight": "model-00001-of-00004.safetensors",
160
  "model.mamba_layers.15.mamba.norm.weight": "model-00001-of-00004.safetensors",
161
  "model.mamba_layers.15.mamba.out_proj.weight": "model-00001-of-00004.safetensors",
162
  "model.mamba_layers.16.input_layernorm.weight": "model-00001-of-00004.safetensors",
 
165
  "model.mamba_layers.16.mamba.conv1d.bias": "model-00001-of-00004.safetensors",
166
  "model.mamba_layers.16.mamba.conv1d.weight": "model-00001-of-00004.safetensors",
167
  "model.mamba_layers.16.mamba.dt_bias": "model-00001-of-00004.safetensors",
168
+ "model.mamba_layers.16.mamba.in_proj.weight": "model-00001-of-00004.safetensors",
169
  "model.mamba_layers.16.mamba.norm.weight": "model-00001-of-00004.safetensors",
170
  "model.mamba_layers.16.mamba.out_proj.weight": "model-00001-of-00004.safetensors",
171
  "model.mamba_layers.17.input_layernorm.weight": "model-00001-of-00004.safetensors",
 
174
  "model.mamba_layers.17.mamba.conv1d.bias": "model-00001-of-00004.safetensors",
175
  "model.mamba_layers.17.mamba.conv1d.weight": "model-00001-of-00004.safetensors",
176
  "model.mamba_layers.17.mamba.dt_bias": "model-00001-of-00004.safetensors",
177
+ "model.mamba_layers.17.mamba.in_proj.weight": "model-00001-of-00004.safetensors",
178
  "model.mamba_layers.17.mamba.norm.weight": "model-00001-of-00004.safetensors",
179
  "model.mamba_layers.17.mamba.out_proj.weight": "model-00001-of-00004.safetensors",
180
  "model.mamba_layers.18.input_layernorm.weight": "model-00001-of-00004.safetensors",
 
183
  "model.mamba_layers.18.mamba.conv1d.bias": "model-00001-of-00004.safetensors",
184
  "model.mamba_layers.18.mamba.conv1d.weight": "model-00001-of-00004.safetensors",
185
  "model.mamba_layers.18.mamba.dt_bias": "model-00001-of-00004.safetensors",
186
+ "model.mamba_layers.18.mamba.in_proj.weight": "model-00001-of-00004.safetensors",
187
  "model.mamba_layers.18.mamba.norm.weight": "model-00001-of-00004.safetensors",
188
  "model.mamba_layers.18.mamba.out_proj.weight": "model-00001-of-00004.safetensors",
189
  "model.mamba_layers.19.input_layernorm.weight": "model-00001-of-00004.safetensors",
 
192
  "model.mamba_layers.19.mamba.conv1d.bias": "model-00001-of-00004.safetensors",
193
  "model.mamba_layers.19.mamba.conv1d.weight": "model-00001-of-00004.safetensors",
194
  "model.mamba_layers.19.mamba.dt_bias": "model-00001-of-00004.safetensors",
195
+ "model.mamba_layers.19.mamba.in_proj.weight": "model-00001-of-00004.safetensors",
196
  "model.mamba_layers.19.mamba.norm.weight": "model-00001-of-00004.safetensors",
197
  "model.mamba_layers.19.mamba.out_proj.weight": "model-00001-of-00004.safetensors",
198
  "model.mamba_layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
 
201
  "model.mamba_layers.2.mamba.conv1d.bias": "model-00001-of-00004.safetensors",
202
  "model.mamba_layers.2.mamba.conv1d.weight": "model-00001-of-00004.safetensors",
203
  "model.mamba_layers.2.mamba.dt_bias": "model-00001-of-00004.safetensors",
204
+ "model.mamba_layers.2.mamba.in_proj.weight": "model-00001-of-00004.safetensors",
205
  "model.mamba_layers.2.mamba.norm.weight": "model-00001-of-00004.safetensors",
206
  "model.mamba_layers.2.mamba.out_proj.weight": "model-00001-of-00004.safetensors",
207
  "model.mamba_layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors",
 
210
  "model.mamba_layers.20.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
211
  "model.mamba_layers.20.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
212
  "model.mamba_layers.20.mamba.dt_bias": "model-00001-of-00004.safetensors",
213
+ "model.mamba_layers.20.mamba.in_proj.weight": "model-00002-of-00004.safetensors",
214
  "model.mamba_layers.20.mamba.norm.weight": "model-00002-of-00004.safetensors",
215
  "model.mamba_layers.20.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
216
  "model.mamba_layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors",
 
219
  "model.mamba_layers.21.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
220
  "model.mamba_layers.21.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
221
  "model.mamba_layers.21.mamba.dt_bias": "model-00002-of-00004.safetensors",
222
+ "model.mamba_layers.21.mamba.in_proj.weight": "model-00002-of-00004.safetensors",
223
  "model.mamba_layers.21.mamba.norm.weight": "model-00002-of-00004.safetensors",
224
  "model.mamba_layers.21.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
225
  "model.mamba_layers.22.input_layernorm.weight": "model-00002-of-00004.safetensors",
 
228
  "model.mamba_layers.22.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
229
  "model.mamba_layers.22.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
230
  "model.mamba_layers.22.mamba.dt_bias": "model-00002-of-00004.safetensors",
231
+ "model.mamba_layers.22.mamba.in_proj.weight": "model-00002-of-00004.safetensors",
232
  "model.mamba_layers.22.mamba.norm.weight": "model-00002-of-00004.safetensors",
233
  "model.mamba_layers.22.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
234
  "model.mamba_layers.23.input_layernorm.weight": "model-00002-of-00004.safetensors",
 
237
  "model.mamba_layers.23.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
238
  "model.mamba_layers.23.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
239
  "model.mamba_layers.23.mamba.dt_bias": "model-00002-of-00004.safetensors",
240
+ "model.mamba_layers.23.mamba.in_proj.weight": "model-00002-of-00004.safetensors",
241
  "model.mamba_layers.23.mamba.norm.weight": "model-00002-of-00004.safetensors",
242
  "model.mamba_layers.23.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
243
  "model.mamba_layers.24.input_layernorm.weight": "model-00002-of-00004.safetensors",
 
246
  "model.mamba_layers.24.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
247
  "model.mamba_layers.24.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
248
  "model.mamba_layers.24.mamba.dt_bias": "model-00002-of-00004.safetensors",
249
+ "model.mamba_layers.24.mamba.in_proj.weight": "model-00002-of-00004.safetensors",
250
  "model.mamba_layers.24.mamba.norm.weight": "model-00002-of-00004.safetensors",
251
  "model.mamba_layers.24.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
252
  "model.mamba_layers.25.input_layernorm.weight": "model-00002-of-00004.safetensors",
 
255
  "model.mamba_layers.25.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
256
  "model.mamba_layers.25.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
257
  "model.mamba_layers.25.mamba.dt_bias": "model-00002-of-00004.safetensors",
258
+ "model.mamba_layers.25.mamba.in_proj.weight": "model-00002-of-00004.safetensors",
259
  "model.mamba_layers.25.mamba.norm.weight": "model-00002-of-00004.safetensors",
260
  "model.mamba_layers.25.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
261
  "model.mamba_layers.26.input_layernorm.weight": "model-00002-of-00004.safetensors",
 
264
  "model.mamba_layers.26.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
265
  "model.mamba_layers.26.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
266
  "model.mamba_layers.26.mamba.dt_bias": "model-00002-of-00004.safetensors",
267
+ "model.mamba_layers.26.mamba.in_proj.weight": "model-00002-of-00004.safetensors",
268
  "model.mamba_layers.26.mamba.norm.weight": "model-00002-of-00004.safetensors",
269
  "model.mamba_layers.26.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
270
  "model.mamba_layers.27.input_layernorm.weight": "model-00002-of-00004.safetensors",
 
273
  "model.mamba_layers.27.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
274
  "model.mamba_layers.27.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
275
  "model.mamba_layers.27.mamba.dt_bias": "model-00002-of-00004.safetensors",
276
+ "model.mamba_layers.27.mamba.in_proj.weight": "model-00002-of-00004.safetensors",
277
  "model.mamba_layers.27.mamba.norm.weight": "model-00002-of-00004.safetensors",
278
  "model.mamba_layers.27.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
279
  "model.mamba_layers.28.input_layernorm.weight": "model-00002-of-00004.safetensors",
 
282
  "model.mamba_layers.28.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
283
  "model.mamba_layers.28.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
284
  "model.mamba_layers.28.mamba.dt_bias": "model-00002-of-00004.safetensors",
285
+ "model.mamba_layers.28.mamba.in_proj.weight": "model-00002-of-00004.safetensors",
286
  "model.mamba_layers.28.mamba.norm.weight": "model-00002-of-00004.safetensors",
287
  "model.mamba_layers.28.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
288
  "model.mamba_layers.29.input_layernorm.weight": "model-00002-of-00004.safetensors",
 
291
  "model.mamba_layers.29.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
292
  "model.mamba_layers.29.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
293
  "model.mamba_layers.29.mamba.dt_bias": "model-00002-of-00004.safetensors",
294
+ "model.mamba_layers.29.mamba.in_proj.weight": "model-00002-of-00004.safetensors",
295
  "model.mamba_layers.29.mamba.norm.weight": "model-00002-of-00004.safetensors",
296
  "model.mamba_layers.29.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
297
  "model.mamba_layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
 
300
  "model.mamba_layers.3.mamba.conv1d.bias": "model-00001-of-00004.safetensors",
301
  "model.mamba_layers.3.mamba.conv1d.weight": "model-00001-of-00004.safetensors",
302
  "model.mamba_layers.3.mamba.dt_bias": "model-00001-of-00004.safetensors",
303
+ "model.mamba_layers.3.mamba.in_proj.weight": "model-00001-of-00004.safetensors",
304
  "model.mamba_layers.3.mamba.norm.weight": "model-00001-of-00004.safetensors",
305
  "model.mamba_layers.3.mamba.out_proj.weight": "model-00001-of-00004.safetensors",
306
  "model.mamba_layers.30.input_layernorm.weight": "model-00002-of-00004.safetensors",
 
309
  "model.mamba_layers.30.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
310
  "model.mamba_layers.30.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
311
  "model.mamba_layers.30.mamba.dt_bias": "model-00002-of-00004.safetensors",
312
+ "model.mamba_layers.30.mamba.in_proj.weight": "model-00002-of-00004.safetensors",
313
  "model.mamba_layers.30.mamba.norm.weight": "model-00002-of-00004.safetensors",
314
  "model.mamba_layers.30.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
315
  "model.mamba_layers.31.input_layernorm.weight": "model-00002-of-00004.safetensors",
 
318
  "model.mamba_layers.31.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
319
  "model.mamba_layers.31.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
320
  "model.mamba_layers.31.mamba.dt_bias": "model-00002-of-00004.safetensors",
321
+ "model.mamba_layers.31.mamba.in_proj.weight": "model-00002-of-00004.safetensors",
322
  "model.mamba_layers.31.mamba.norm.weight": "model-00002-of-00004.safetensors",
323
  "model.mamba_layers.31.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
324
  "model.mamba_layers.32.input_layernorm.weight": "model-00002-of-00004.safetensors",
 
327
  "model.mamba_layers.32.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
328
  "model.mamba_layers.32.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
329
  "model.mamba_layers.32.mamba.dt_bias": "model-00002-of-00004.safetensors",
330
+ "model.mamba_layers.32.mamba.in_proj.weight": "model-00002-of-00004.safetensors",
331
  "model.mamba_layers.32.mamba.norm.weight": "model-00002-of-00004.safetensors",
332
  "model.mamba_layers.32.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
333
  "model.mamba_layers.33.input_layernorm.weight": "model-00002-of-00004.safetensors",
 
336
  "model.mamba_layers.33.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
337
  "model.mamba_layers.33.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
338
  "model.mamba_layers.33.mamba.dt_bias": "model-00002-of-00004.safetensors",
339
+ "model.mamba_layers.33.mamba.in_proj.weight": "model-00002-of-00004.safetensors",
340
  "model.mamba_layers.33.mamba.norm.weight": "model-00002-of-00004.safetensors",
341
  "model.mamba_layers.33.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
342
  "model.mamba_layers.34.input_layernorm.weight": "model-00002-of-00004.safetensors",
 
345
  "model.mamba_layers.34.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
346
  "model.mamba_layers.34.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
347
  "model.mamba_layers.34.mamba.dt_bias": "model-00002-of-00004.safetensors",
348
+ "model.mamba_layers.34.mamba.in_proj.weight": "model-00002-of-00004.safetensors",
349
  "model.mamba_layers.34.mamba.norm.weight": "model-00002-of-00004.safetensors",
350
  "model.mamba_layers.34.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
351
  "model.mamba_layers.35.input_layernorm.weight": "model-00002-of-00004.safetensors",
 
354
  "model.mamba_layers.35.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
355
  "model.mamba_layers.35.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
356
  "model.mamba_layers.35.mamba.dt_bias": "model-00002-of-00004.safetensors",
357
+ "model.mamba_layers.35.mamba.in_proj.weight": "model-00002-of-00004.safetensors",
358
  "model.mamba_layers.35.mamba.norm.weight": "model-00002-of-00004.safetensors",
359
  "model.mamba_layers.35.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
360
  "model.mamba_layers.36.input_layernorm.weight": "model-00002-of-00004.safetensors",
 
363
  "model.mamba_layers.36.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
364
  "model.mamba_layers.36.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
365
  "model.mamba_layers.36.mamba.dt_bias": "model-00002-of-00004.safetensors",
366
+ "model.mamba_layers.36.mamba.in_proj.weight": "model-00002-of-00004.safetensors",
367
  "model.mamba_layers.36.mamba.norm.weight": "model-00002-of-00004.safetensors",
368
  "model.mamba_layers.36.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
369
  "model.mamba_layers.37.input_layernorm.weight": "model-00002-of-00004.safetensors",
 
372
  "model.mamba_layers.37.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
373
  "model.mamba_layers.37.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
374
  "model.mamba_layers.37.mamba.dt_bias": "model-00002-of-00004.safetensors",
375
+ "model.mamba_layers.37.mamba.in_proj.weight": "model-00002-of-00004.safetensors",
376
  "model.mamba_layers.37.mamba.norm.weight": "model-00002-of-00004.safetensors",
377
  "model.mamba_layers.37.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
378
  "model.mamba_layers.38.input_layernorm.weight": "model-00002-of-00004.safetensors",
 
381
  "model.mamba_layers.38.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
382
  "model.mamba_layers.38.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
383
  "model.mamba_layers.38.mamba.dt_bias": "model-00002-of-00004.safetensors",
384
+ "model.mamba_layers.38.mamba.in_proj.weight": "model-00002-of-00004.safetensors",
385
  "model.mamba_layers.38.mamba.norm.weight": "model-00002-of-00004.safetensors",
386
  "model.mamba_layers.38.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
387
  "model.mamba_layers.39.input_layernorm.weight": "model-00002-of-00004.safetensors",
 
390
  "model.mamba_layers.39.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
391
  "model.mamba_layers.39.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
392
  "model.mamba_layers.39.mamba.dt_bias": "model-00002-of-00004.safetensors",
393
+ "model.mamba_layers.39.mamba.in_proj.weight": "model-00002-of-00004.safetensors",
394
  "model.mamba_layers.39.mamba.norm.weight": "model-00002-of-00004.safetensors",
395
  "model.mamba_layers.39.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
396
  "model.mamba_layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
 
399
  "model.mamba_layers.4.mamba.conv1d.bias": "model-00001-of-00004.safetensors",
400
  "model.mamba_layers.4.mamba.conv1d.weight": "model-00001-of-00004.safetensors",
401
  "model.mamba_layers.4.mamba.dt_bias": "model-00001-of-00004.safetensors",
402
+ "model.mamba_layers.4.mamba.in_proj.weight": "model-00001-of-00004.safetensors",
403
  "model.mamba_layers.4.mamba.norm.weight": "model-00001-of-00004.safetensors",
404
  "model.mamba_layers.4.mamba.out_proj.weight": "model-00001-of-00004.safetensors",
405
  "model.mamba_layers.40.input_layernorm.weight": "model-00002-of-00004.safetensors",
 
408
  "model.mamba_layers.40.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
409
  "model.mamba_layers.40.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
410
  "model.mamba_layers.40.mamba.dt_bias": "model-00002-of-00004.safetensors",
411
+ "model.mamba_layers.40.mamba.in_proj.weight": "model-00002-of-00004.safetensors",
412
  "model.mamba_layers.40.mamba.norm.weight": "model-00002-of-00004.safetensors",
413
  "model.mamba_layers.40.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
414
  "model.mamba_layers.41.input_layernorm.weight": "model-00002-of-00004.safetensors",
 
417
  "model.mamba_layers.41.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
418
  "model.mamba_layers.41.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
419
  "model.mamba_layers.41.mamba.dt_bias": "model-00002-of-00004.safetensors",
420
+ "model.mamba_layers.41.mamba.in_proj.weight": "model-00002-of-00004.safetensors",
421
  "model.mamba_layers.41.mamba.norm.weight": "model-00002-of-00004.safetensors",
422
  "model.mamba_layers.41.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
423
  "model.mamba_layers.42.input_layernorm.weight": "model-00002-of-00004.safetensors",
 
426
  "model.mamba_layers.42.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
427
  "model.mamba_layers.42.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
428
  "model.mamba_layers.42.mamba.dt_bias": "model-00002-of-00004.safetensors",
429
+ "model.mamba_layers.42.mamba.in_proj.weight": "model-00002-of-00004.safetensors",
430
  "model.mamba_layers.42.mamba.norm.weight": "model-00002-of-00004.safetensors",
431
  "model.mamba_layers.42.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
432
  "model.mamba_layers.43.input_layernorm.weight": "model-00002-of-00004.safetensors",
 
435
  "model.mamba_layers.43.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
436
  "model.mamba_layers.43.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
437
  "model.mamba_layers.43.mamba.dt_bias": "model-00002-of-00004.safetensors",
438
+ "model.mamba_layers.43.mamba.in_proj.weight": "model-00002-of-00004.safetensors",
439
  "model.mamba_layers.43.mamba.norm.weight": "model-00002-of-00004.safetensors",
440
  "model.mamba_layers.43.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
441
  "model.mamba_layers.44.input_layernorm.weight": "model-00002-of-00004.safetensors",
 
444
  "model.mamba_layers.44.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
445
  "model.mamba_layers.44.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
446
  "model.mamba_layers.44.mamba.dt_bias": "model-00002-of-00004.safetensors",
447
+ "model.mamba_layers.44.mamba.in_proj.weight": "model-00002-of-00004.safetensors",
448
  "model.mamba_layers.44.mamba.norm.weight": "model-00002-of-00004.safetensors",
449
  "model.mamba_layers.44.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
450
  "model.mamba_layers.45.input_layernorm.weight": "model-00002-of-00004.safetensors",
 
453
  "model.mamba_layers.45.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
454
  "model.mamba_layers.45.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
455
  "model.mamba_layers.45.mamba.dt_bias": "model-00002-of-00004.safetensors",
456
+ "model.mamba_layers.45.mamba.in_proj.weight": "model-00002-of-00004.safetensors",
457
  "model.mamba_layers.45.mamba.norm.weight": "model-00002-of-00004.safetensors",
458
  "model.mamba_layers.45.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
459
  "model.mamba_layers.46.input_layernorm.weight": "model-00002-of-00004.safetensors",
 
462
  "model.mamba_layers.46.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
463
  "model.mamba_layers.46.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
464
  "model.mamba_layers.46.mamba.dt_bias": "model-00002-of-00004.safetensors",
465
+ "model.mamba_layers.46.mamba.in_proj.weight": "model-00002-of-00004.safetensors",
466
  "model.mamba_layers.46.mamba.norm.weight": "model-00002-of-00004.safetensors",
467
  "model.mamba_layers.46.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
468
  "model.mamba_layers.47.input_layernorm.weight": "model-00002-of-00004.safetensors",
 
471
  "model.mamba_layers.47.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
472
  "model.mamba_layers.47.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
473
  "model.mamba_layers.47.mamba.dt_bias": "model-00002-of-00004.safetensors",
474
+ "model.mamba_layers.47.mamba.in_proj.weight": "model-00002-of-00004.safetensors",
475
  "model.mamba_layers.47.mamba.norm.weight": "model-00002-of-00004.safetensors",
476
  "model.mamba_layers.47.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
477
  "model.mamba_layers.48.input_layernorm.weight": "model-00002-of-00004.safetensors",
 
480
  "model.mamba_layers.48.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
481
  "model.mamba_layers.48.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
482
  "model.mamba_layers.48.mamba.dt_bias": "model-00002-of-00004.safetensors",
483
+ "model.mamba_layers.48.mamba.in_proj.weight": "model-00002-of-00004.safetensors",
484
  "model.mamba_layers.48.mamba.norm.weight": "model-00002-of-00004.safetensors",
485
  "model.mamba_layers.48.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
486
  "model.mamba_layers.49.input_layernorm.weight": "model-00002-of-00004.safetensors",
 
489
  "model.mamba_layers.49.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
490
  "model.mamba_layers.49.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
491
  "model.mamba_layers.49.mamba.dt_bias": "model-00002-of-00004.safetensors",
492
+ "model.mamba_layers.49.mamba.in_proj.weight": "model-00002-of-00004.safetensors",
493
  "model.mamba_layers.49.mamba.norm.weight": "model-00002-of-00004.safetensors",
494
  "model.mamba_layers.49.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
495
  "model.mamba_layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
 
498
  "model.mamba_layers.5.mamba.conv1d.bias": "model-00001-of-00004.safetensors",
499
  "model.mamba_layers.5.mamba.conv1d.weight": "model-00001-of-00004.safetensors",
500
  "model.mamba_layers.5.mamba.dt_bias": "model-00001-of-00004.safetensors",
501
+ "model.mamba_layers.5.mamba.in_proj.weight": "model-00001-of-00004.safetensors",
502
  "model.mamba_layers.5.mamba.norm.weight": "model-00001-of-00004.safetensors",
503
  "model.mamba_layers.5.mamba.out_proj.weight": "model-00001-of-00004.safetensors",
504
  "model.mamba_layers.50.input_layernorm.weight": "model-00002-of-00004.safetensors",
 
507
  "model.mamba_layers.50.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
508
  "model.mamba_layers.50.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
509
  "model.mamba_layers.50.mamba.dt_bias": "model-00002-of-00004.safetensors",
510
+ "model.mamba_layers.50.mamba.in_proj.weight": "model-00002-of-00004.safetensors",
511
  "model.mamba_layers.50.mamba.norm.weight": "model-00002-of-00004.safetensors",
512
  "model.mamba_layers.50.mamba.out_proj.weight": "model-00002-of-00004.safetensors",
513
  "model.mamba_layers.51.input_layernorm.weight": "model-00003-of-00004.safetensors",
 
516
  "model.mamba_layers.51.mamba.conv1d.bias": "model-00002-of-00004.safetensors",
517
  "model.mamba_layers.51.mamba.conv1d.weight": "model-00002-of-00004.safetensors",
518
  "model.mamba_layers.51.mamba.dt_bias": "model-00002-of-00004.safetensors",
519
+ "model.mamba_layers.51.mamba.in_proj.weight": "model-00002-of-00004.safetensors",
520
  "model.mamba_layers.51.mamba.norm.weight": "model-00002-of-00004.safetensors",
521
  "model.mamba_layers.51.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
522
  "model.mamba_layers.52.input_layernorm.weight": "model-00003-of-00004.safetensors",
 
525
  "model.mamba_layers.52.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
526
  "model.mamba_layers.52.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
527
  "model.mamba_layers.52.mamba.dt_bias": "model-00003-of-00004.safetensors",
528
+ "model.mamba_layers.52.mamba.in_proj.weight": "model-00003-of-00004.safetensors",
529
  "model.mamba_layers.52.mamba.norm.weight": "model-00003-of-00004.safetensors",
530
  "model.mamba_layers.52.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
531
  "model.mamba_layers.53.input_layernorm.weight": "model-00003-of-00004.safetensors",
 
534
  "model.mamba_layers.53.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
535
  "model.mamba_layers.53.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
536
  "model.mamba_layers.53.mamba.dt_bias": "model-00003-of-00004.safetensors",
537
+ "model.mamba_layers.53.mamba.in_proj.weight": "model-00003-of-00004.safetensors",
538
  "model.mamba_layers.53.mamba.norm.weight": "model-00003-of-00004.safetensors",
539
  "model.mamba_layers.53.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
540
  "model.mamba_layers.54.input_layernorm.weight": "model-00003-of-00004.safetensors",
 
543
  "model.mamba_layers.54.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
544
  "model.mamba_layers.54.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
545
  "model.mamba_layers.54.mamba.dt_bias": "model-00003-of-00004.safetensors",
546
+ "model.mamba_layers.54.mamba.in_proj.weight": "model-00003-of-00004.safetensors",
547
  "model.mamba_layers.54.mamba.norm.weight": "model-00003-of-00004.safetensors",
548
  "model.mamba_layers.54.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
549
  "model.mamba_layers.55.input_layernorm.weight": "model-00003-of-00004.safetensors",
 
552
  "model.mamba_layers.55.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
553
  "model.mamba_layers.55.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
554
  "model.mamba_layers.55.mamba.dt_bias": "model-00003-of-00004.safetensors",
555
+ "model.mamba_layers.55.mamba.in_proj.weight": "model-00003-of-00004.safetensors",
556
  "model.mamba_layers.55.mamba.norm.weight": "model-00003-of-00004.safetensors",
557
  "model.mamba_layers.55.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
558
  "model.mamba_layers.56.input_layernorm.weight": "model-00003-of-00004.safetensors",
 
561
  "model.mamba_layers.56.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
562
  "model.mamba_layers.56.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
563
  "model.mamba_layers.56.mamba.dt_bias": "model-00003-of-00004.safetensors",
564
+ "model.mamba_layers.56.mamba.in_proj.weight": "model-00003-of-00004.safetensors",
565
  "model.mamba_layers.56.mamba.norm.weight": "model-00003-of-00004.safetensors",
566
  "model.mamba_layers.56.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
567
  "model.mamba_layers.57.input_layernorm.weight": "model-00003-of-00004.safetensors",
 
570
  "model.mamba_layers.57.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
571
  "model.mamba_layers.57.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
572
  "model.mamba_layers.57.mamba.dt_bias": "model-00003-of-00004.safetensors",
573
+ "model.mamba_layers.57.mamba.in_proj.weight": "model-00003-of-00004.safetensors",
574
  "model.mamba_layers.57.mamba.norm.weight": "model-00003-of-00004.safetensors",
575
  "model.mamba_layers.57.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
576
  "model.mamba_layers.58.input_layernorm.weight": "model-00003-of-00004.safetensors",
 
579
  "model.mamba_layers.58.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
580
  "model.mamba_layers.58.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
581
  "model.mamba_layers.58.mamba.dt_bias": "model-00003-of-00004.safetensors",
582
+ "model.mamba_layers.58.mamba.in_proj.weight": "model-00003-of-00004.safetensors",
583
  "model.mamba_layers.58.mamba.norm.weight": "model-00003-of-00004.safetensors",
584
  "model.mamba_layers.58.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
585
  "model.mamba_layers.59.input_layernorm.weight": "model-00003-of-00004.safetensors",
 
588
  "model.mamba_layers.59.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
589
  "model.mamba_layers.59.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
590
  "model.mamba_layers.59.mamba.dt_bias": "model-00003-of-00004.safetensors",
591
+ "model.mamba_layers.59.mamba.in_proj.weight": "model-00003-of-00004.safetensors",
592
  "model.mamba_layers.59.mamba.norm.weight": "model-00003-of-00004.safetensors",
593
  "model.mamba_layers.59.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
594
  "model.mamba_layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
 
597
  "model.mamba_layers.6.mamba.conv1d.bias": "model-00001-of-00004.safetensors",
598
  "model.mamba_layers.6.mamba.conv1d.weight": "model-00001-of-00004.safetensors",
599
  "model.mamba_layers.6.mamba.dt_bias": "model-00001-of-00004.safetensors",
600
+ "model.mamba_layers.6.mamba.in_proj.weight": "model-00001-of-00004.safetensors",
601
  "model.mamba_layers.6.mamba.norm.weight": "model-00001-of-00004.safetensors",
602
  "model.mamba_layers.6.mamba.out_proj.weight": "model-00001-of-00004.safetensors",
603
  "model.mamba_layers.60.input_layernorm.weight": "model-00003-of-00004.safetensors",
 
606
  "model.mamba_layers.60.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
607
  "model.mamba_layers.60.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
608
  "model.mamba_layers.60.mamba.dt_bias": "model-00003-of-00004.safetensors",
609
+ "model.mamba_layers.60.mamba.in_proj.weight": "model-00003-of-00004.safetensors",
610
  "model.mamba_layers.60.mamba.norm.weight": "model-00003-of-00004.safetensors",
611
  "model.mamba_layers.60.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
612
  "model.mamba_layers.61.input_layernorm.weight": "model-00003-of-00004.safetensors",
 
615
  "model.mamba_layers.61.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
616
  "model.mamba_layers.61.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
617
  "model.mamba_layers.61.mamba.dt_bias": "model-00003-of-00004.safetensors",
618
+ "model.mamba_layers.61.mamba.in_proj.weight": "model-00003-of-00004.safetensors",
619
  "model.mamba_layers.61.mamba.norm.weight": "model-00003-of-00004.safetensors",
620
  "model.mamba_layers.61.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
621
  "model.mamba_layers.62.input_layernorm.weight": "model-00003-of-00004.safetensors",
 
624
  "model.mamba_layers.62.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
625
  "model.mamba_layers.62.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
626
  "model.mamba_layers.62.mamba.dt_bias": "model-00003-of-00004.safetensors",
627
+ "model.mamba_layers.62.mamba.in_proj.weight": "model-00003-of-00004.safetensors",
628
  "model.mamba_layers.62.mamba.norm.weight": "model-00003-of-00004.safetensors",
629
  "model.mamba_layers.62.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
630
  "model.mamba_layers.63.input_layernorm.weight": "model-00003-of-00004.safetensors",
 
633
  "model.mamba_layers.63.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
634
  "model.mamba_layers.63.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
635
  "model.mamba_layers.63.mamba.dt_bias": "model-00003-of-00004.safetensors",
636
+ "model.mamba_layers.63.mamba.in_proj.weight": "model-00003-of-00004.safetensors",
637
  "model.mamba_layers.63.mamba.norm.weight": "model-00003-of-00004.safetensors",
638
  "model.mamba_layers.63.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
639
  "model.mamba_layers.64.input_layernorm.weight": "model-00003-of-00004.safetensors",
 
642
  "model.mamba_layers.64.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
643
  "model.mamba_layers.64.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
644
  "model.mamba_layers.64.mamba.dt_bias": "model-00003-of-00004.safetensors",
645
+ "model.mamba_layers.64.mamba.in_proj.weight": "model-00003-of-00004.safetensors",
646
  "model.mamba_layers.64.mamba.norm.weight": "model-00003-of-00004.safetensors",
647
  "model.mamba_layers.64.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
648
  "model.mamba_layers.65.input_layernorm.weight": "model-00003-of-00004.safetensors",
 
651
  "model.mamba_layers.65.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
652
  "model.mamba_layers.65.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
653
  "model.mamba_layers.65.mamba.dt_bias": "model-00003-of-00004.safetensors",
654
+ "model.mamba_layers.65.mamba.in_proj.weight": "model-00003-of-00004.safetensors",
655
  "model.mamba_layers.65.mamba.norm.weight": "model-00003-of-00004.safetensors",
656
  "model.mamba_layers.65.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
657
  "model.mamba_layers.66.input_layernorm.weight": "model-00003-of-00004.safetensors",
 
660
  "model.mamba_layers.66.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
661
  "model.mamba_layers.66.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
662
  "model.mamba_layers.66.mamba.dt_bias": "model-00003-of-00004.safetensors",
663
+ "model.mamba_layers.66.mamba.in_proj.weight": "model-00003-of-00004.safetensors",
664
  "model.mamba_layers.66.mamba.norm.weight": "model-00003-of-00004.safetensors",
665
  "model.mamba_layers.66.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
666
  "model.mamba_layers.67.input_layernorm.weight": "model-00003-of-00004.safetensors",
 
669
  "model.mamba_layers.67.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
670
  "model.mamba_layers.67.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
671
  "model.mamba_layers.67.mamba.dt_bias": "model-00003-of-00004.safetensors",
672
+ "model.mamba_layers.67.mamba.in_proj.weight": "model-00003-of-00004.safetensors",
673
  "model.mamba_layers.67.mamba.norm.weight": "model-00003-of-00004.safetensors",
674
  "model.mamba_layers.67.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
675
  "model.mamba_layers.68.input_layernorm.weight": "model-00003-of-00004.safetensors",
 
678
  "model.mamba_layers.68.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
679
  "model.mamba_layers.68.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
680
  "model.mamba_layers.68.mamba.dt_bias": "model-00003-of-00004.safetensors",
681
+ "model.mamba_layers.68.mamba.in_proj.weight": "model-00003-of-00004.safetensors",
682
  "model.mamba_layers.68.mamba.norm.weight": "model-00003-of-00004.safetensors",
683
  "model.mamba_layers.68.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
684
  "model.mamba_layers.69.input_layernorm.weight": "model-00003-of-00004.safetensors",
 
687
  "model.mamba_layers.69.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
688
  "model.mamba_layers.69.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
689
  "model.mamba_layers.69.mamba.dt_bias": "model-00003-of-00004.safetensors",
690
+ "model.mamba_layers.69.mamba.in_proj.weight": "model-00003-of-00004.safetensors",
691
  "model.mamba_layers.69.mamba.norm.weight": "model-00003-of-00004.safetensors",
692
  "model.mamba_layers.69.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
693
  "model.mamba_layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
 
696
  "model.mamba_layers.7.mamba.conv1d.bias": "model-00001-of-00004.safetensors",
697
  "model.mamba_layers.7.mamba.conv1d.weight": "model-00001-of-00004.safetensors",
698
  "model.mamba_layers.7.mamba.dt_bias": "model-00001-of-00004.safetensors",
699
+ "model.mamba_layers.7.mamba.in_proj.weight": "model-00001-of-00004.safetensors",
700
  "model.mamba_layers.7.mamba.norm.weight": "model-00001-of-00004.safetensors",
701
  "model.mamba_layers.7.mamba.out_proj.weight": "model-00001-of-00004.safetensors",
702
  "model.mamba_layers.70.input_layernorm.weight": "model-00003-of-00004.safetensors",
 
705
  "model.mamba_layers.70.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
706
  "model.mamba_layers.70.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
707
  "model.mamba_layers.70.mamba.dt_bias": "model-00003-of-00004.safetensors",
708
+ "model.mamba_layers.70.mamba.in_proj.weight": "model-00003-of-00004.safetensors",
709
  "model.mamba_layers.70.mamba.norm.weight": "model-00003-of-00004.safetensors",
710
  "model.mamba_layers.70.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
711
  "model.mamba_layers.71.input_layernorm.weight": "model-00003-of-00004.safetensors",
 
714
  "model.mamba_layers.71.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
715
  "model.mamba_layers.71.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
716
  "model.mamba_layers.71.mamba.dt_bias": "model-00003-of-00004.safetensors",
717
+ "model.mamba_layers.71.mamba.in_proj.weight": "model-00003-of-00004.safetensors",
718
  "model.mamba_layers.71.mamba.norm.weight": "model-00003-of-00004.safetensors",
719
  "model.mamba_layers.71.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
720
  "model.mamba_layers.72.input_layernorm.weight": "model-00003-of-00004.safetensors",
 
723
  "model.mamba_layers.72.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
724
  "model.mamba_layers.72.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
725
  "model.mamba_layers.72.mamba.dt_bias": "model-00003-of-00004.safetensors",
726
+ "model.mamba_layers.72.mamba.in_proj.weight": "model-00003-of-00004.safetensors",
727
  "model.mamba_layers.72.mamba.norm.weight": "model-00003-of-00004.safetensors",
728
  "model.mamba_layers.72.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
729
  "model.mamba_layers.73.input_layernorm.weight": "model-00003-of-00004.safetensors",
 
732
  "model.mamba_layers.73.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
733
  "model.mamba_layers.73.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
734
  "model.mamba_layers.73.mamba.dt_bias": "model-00003-of-00004.safetensors",
735
+ "model.mamba_layers.73.mamba.in_proj.weight": "model-00003-of-00004.safetensors",
736
  "model.mamba_layers.73.mamba.norm.weight": "model-00003-of-00004.safetensors",
737
  "model.mamba_layers.73.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
738
  "model.mamba_layers.74.input_layernorm.weight": "model-00003-of-00004.safetensors",
 
741
  "model.mamba_layers.74.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
742
  "model.mamba_layers.74.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
743
  "model.mamba_layers.74.mamba.dt_bias": "model-00003-of-00004.safetensors",
744
+ "model.mamba_layers.74.mamba.in_proj.weight": "model-00003-of-00004.safetensors",
745
  "model.mamba_layers.74.mamba.norm.weight": "model-00003-of-00004.safetensors",
746
  "model.mamba_layers.74.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
747
  "model.mamba_layers.75.input_layernorm.weight": "model-00003-of-00004.safetensors",
 
750
  "model.mamba_layers.75.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
751
  "model.mamba_layers.75.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
752
  "model.mamba_layers.75.mamba.dt_bias": "model-00003-of-00004.safetensors",
753
+ "model.mamba_layers.75.mamba.in_proj.weight": "model-00003-of-00004.safetensors",
754
  "model.mamba_layers.75.mamba.norm.weight": "model-00003-of-00004.safetensors",
755
  "model.mamba_layers.75.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
756
  "model.mamba_layers.76.input_layernorm.weight": "model-00003-of-00004.safetensors",
 
759
  "model.mamba_layers.76.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
760
  "model.mamba_layers.76.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
761
  "model.mamba_layers.76.mamba.dt_bias": "model-00003-of-00004.safetensors",
762
+ "model.mamba_layers.76.mamba.in_proj.weight": "model-00003-of-00004.safetensors",
763
  "model.mamba_layers.76.mamba.norm.weight": "model-00003-of-00004.safetensors",
764
  "model.mamba_layers.76.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
765
  "model.mamba_layers.77.input_layernorm.weight": "model-00003-of-00004.safetensors",
 
768
  "model.mamba_layers.77.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
769
  "model.mamba_layers.77.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
770
  "model.mamba_layers.77.mamba.dt_bias": "model-00003-of-00004.safetensors",
771
+ "model.mamba_layers.77.mamba.in_proj.weight": "model-00003-of-00004.safetensors",
772
  "model.mamba_layers.77.mamba.norm.weight": "model-00003-of-00004.safetensors",
773
  "model.mamba_layers.77.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
774
  "model.mamba_layers.78.input_layernorm.weight": "model-00003-of-00004.safetensors",
 
777
  "model.mamba_layers.78.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
778
  "model.mamba_layers.78.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
779
  "model.mamba_layers.78.mamba.dt_bias": "model-00003-of-00004.safetensors",
780
+ "model.mamba_layers.78.mamba.in_proj.weight": "model-00003-of-00004.safetensors",
781
  "model.mamba_layers.78.mamba.norm.weight": "model-00003-of-00004.safetensors",
782
  "model.mamba_layers.78.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
783
  "model.mamba_layers.79.input_layernorm.weight": "model-00003-of-00004.safetensors",
 
786
  "model.mamba_layers.79.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
787
  "model.mamba_layers.79.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
788
  "model.mamba_layers.79.mamba.dt_bias": "model-00003-of-00004.safetensors",
789
+ "model.mamba_layers.79.mamba.in_proj.weight": "model-00003-of-00004.safetensors",
790
  "model.mamba_layers.79.mamba.norm.weight": "model-00003-of-00004.safetensors",
791
  "model.mamba_layers.79.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
792
  "model.mamba_layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors",
 
795
  "model.mamba_layers.8.mamba.conv1d.bias": "model-00001-of-00004.safetensors",
796
  "model.mamba_layers.8.mamba.conv1d.weight": "model-00001-of-00004.safetensors",
797
  "model.mamba_layers.8.mamba.dt_bias": "model-00001-of-00004.safetensors",
798
+ "model.mamba_layers.8.mamba.in_proj.weight": "model-00001-of-00004.safetensors",
799
  "model.mamba_layers.8.mamba.norm.weight": "model-00001-of-00004.safetensors",
800
  "model.mamba_layers.8.mamba.out_proj.weight": "model-00001-of-00004.safetensors",
801
  "model.mamba_layers.80.input_layernorm.weight": "model-00003-of-00004.safetensors",
 
804
  "model.mamba_layers.80.mamba.conv1d.bias": "model-00003-of-00004.safetensors",
805
  "model.mamba_layers.80.mamba.conv1d.weight": "model-00003-of-00004.safetensors",
806
  "model.mamba_layers.80.mamba.dt_bias": "model-00003-of-00004.safetensors",
807
+ "model.mamba_layers.80.mamba.in_proj.weight": "model-00003-of-00004.safetensors",
808
  "model.mamba_layers.80.mamba.norm.weight": "model-00003-of-00004.safetensors",
809
  "model.mamba_layers.80.mamba.out_proj.weight": "model-00003-of-00004.safetensors",
810
  "model.mamba_layers.9.input_layernorm.weight": "model-00001-of-00004.safetensors",
 
813
  "model.mamba_layers.9.mamba.conv1d.bias": "model-00001-of-00004.safetensors",
814
  "model.mamba_layers.9.mamba.conv1d.weight": "model-00001-of-00004.safetensors",
815
  "model.mamba_layers.9.mamba.dt_bias": "model-00001-of-00004.safetensors",
816
+ "model.mamba_layers.9.mamba.in_proj.weight": "model-00001-of-00004.safetensors",
817
  "model.mamba_layers.9.mamba.norm.weight": "model-00001-of-00004.safetensors",
818
  "model.mamba_layers.9.mamba.out_proj.weight": "model-00001-of-00004.safetensors"
819
  }
820
+ }