leaderboard-pr-bot commited on
Commit
30c42de
1 Parent(s): 339130b

Adding Evaluation Results

Browse files

This is an automated PR created with https://huggingface.co/spaces/Weyaxi/open-llm-leaderboard-results-pr

The purpose of this PR is to add evaluation results from the Open LLM Leaderboard to your model card.

If you encounter any issues, please report them to https://huggingface.co/spaces/Weyaxi/open-llm-leaderboard-results-pr/discussions

Files changed (1) hide show
  1. README.md +112 -13
README.md CHANGED
@@ -20,8 +20,7 @@ model-index:
20
  value: 73.81
21
  name: normalized accuracy
22
  source:
23
- url: >-
24
- https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=zhengr/MixTAO-7Bx2-MoE-v8.1
25
  name: Open LLM Leaderboard
26
  - task:
27
  type: text-generation
@@ -37,8 +36,7 @@ model-index:
37
  value: 89.22
38
  name: normalized accuracy
39
  source:
40
- url: >-
41
- https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=zhengr/MixTAO-7Bx2-MoE-v8.1
42
  name: Open LLM Leaderboard
43
  - task:
44
  type: text-generation
@@ -55,8 +53,7 @@ model-index:
55
  value: 64.92
56
  name: accuracy
57
  source:
58
- url: >-
59
- https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=zhengr/MixTAO-7Bx2-MoE-v8.1
60
  name: Open LLM Leaderboard
61
  - task:
62
  type: text-generation
@@ -72,8 +69,7 @@ model-index:
72
  - type: mc2
73
  value: 78.57
74
  source:
75
- url: >-
76
- https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=zhengr/MixTAO-7Bx2-MoE-v8.1
77
  name: Open LLM Leaderboard
78
  - task:
79
  type: text-generation
@@ -90,8 +86,7 @@ model-index:
90
  value: 87.37
91
  name: accuracy
92
  source:
93
- url: >-
94
- https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=zhengr/MixTAO-7Bx2-MoE-v8.1
95
  name: Open LLM Leaderboard
96
  - task:
97
  type: text-generation
@@ -108,8 +103,99 @@ model-index:
108
  value: 71.11
109
  name: accuracy
110
  source:
111
- url: >-
112
- https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=zhengr/MixTAO-7Bx2-MoE-v8.1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  name: Open LLM Leaderboard
114
  ---
115
 
@@ -143,4 +229,17 @@ Detailed results can be found [here](https://huggingface.co/datasets/open-llm-le
143
  |MMLU (5-Shot) |64.92|
144
  |TruthfulQA (0-shot) |78.57|
145
  |Winogrande (5-shot) |87.37|
146
- |GSM8k (5-shot) |71.11|
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  value: 73.81
21
  name: normalized accuracy
22
  source:
23
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=zhengr/MixTAO-7Bx2-MoE-v8.1
 
24
  name: Open LLM Leaderboard
25
  - task:
26
  type: text-generation
 
36
  value: 89.22
37
  name: normalized accuracy
38
  source:
39
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=zhengr/MixTAO-7Bx2-MoE-v8.1
 
40
  name: Open LLM Leaderboard
41
  - task:
42
  type: text-generation
 
53
  value: 64.92
54
  name: accuracy
55
  source:
56
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=zhengr/MixTAO-7Bx2-MoE-v8.1
 
57
  name: Open LLM Leaderboard
58
  - task:
59
  type: text-generation
 
69
  - type: mc2
70
  value: 78.57
71
  source:
72
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=zhengr/MixTAO-7Bx2-MoE-v8.1
 
73
  name: Open LLM Leaderboard
74
  - task:
75
  type: text-generation
 
86
  value: 87.37
87
  name: accuracy
88
  source:
89
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=zhengr/MixTAO-7Bx2-MoE-v8.1
 
90
  name: Open LLM Leaderboard
91
  - task:
92
  type: text-generation
 
103
  value: 71.11
104
  name: accuracy
105
  source:
106
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=zhengr/MixTAO-7Bx2-MoE-v8.1
107
+ name: Open LLM Leaderboard
108
+ - task:
109
+ type: text-generation
110
+ name: Text Generation
111
+ dataset:
112
+ name: IFEval (0-Shot)
113
+ type: HuggingFaceH4/ifeval
114
+ args:
115
+ num_few_shot: 0
116
+ metrics:
117
+ - type: inst_level_strict_acc and prompt_level_strict_acc
118
+ value: 41.62
119
+ name: strict accuracy
120
+ source:
121
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=mixtao/MixTAO-7Bx2-MoE-v8.1
122
+ name: Open LLM Leaderboard
123
+ - task:
124
+ type: text-generation
125
+ name: Text Generation
126
+ dataset:
127
+ name: BBH (3-Shot)
128
+ type: BBH
129
+ args:
130
+ num_few_shot: 3
131
+ metrics:
132
+ - type: acc_norm
133
+ value: 32.31
134
+ name: normalized accuracy
135
+ source:
136
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=mixtao/MixTAO-7Bx2-MoE-v8.1
137
+ name: Open LLM Leaderboard
138
+ - task:
139
+ type: text-generation
140
+ name: Text Generation
141
+ dataset:
142
+ name: MATH Lvl 5 (4-Shot)
143
+ type: hendrycks/competition_math
144
+ args:
145
+ num_few_shot: 4
146
+ metrics:
147
+ - type: exact_match
148
+ value: 8.01
149
+ name: exact match
150
+ source:
151
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=mixtao/MixTAO-7Bx2-MoE-v8.1
152
+ name: Open LLM Leaderboard
153
+ - task:
154
+ type: text-generation
155
+ name: Text Generation
156
+ dataset:
157
+ name: GPQA (0-shot)
158
+ type: Idavidrein/gpqa
159
+ args:
160
+ num_few_shot: 0
161
+ metrics:
162
+ - type: acc_norm
163
+ value: 4.59
164
+ name: acc_norm
165
+ source:
166
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=mixtao/MixTAO-7Bx2-MoE-v8.1
167
+ name: Open LLM Leaderboard
168
+ - task:
169
+ type: text-generation
170
+ name: Text Generation
171
+ dataset:
172
+ name: MuSR (0-shot)
173
+ type: TAUR-Lab/MuSR
174
+ args:
175
+ num_few_shot: 0
176
+ metrics:
177
+ - type: acc_norm
178
+ value: 15.29
179
+ name: acc_norm
180
+ source:
181
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=mixtao/MixTAO-7Bx2-MoE-v8.1
182
+ name: Open LLM Leaderboard
183
+ - task:
184
+ type: text-generation
185
+ name: Text Generation
186
+ dataset:
187
+ name: MMLU-PRO (5-shot)
188
+ type: TIGER-Lab/MMLU-Pro
189
+ config: main
190
+ split: test
191
+ args:
192
+ num_few_shot: 5
193
+ metrics:
194
+ - type: acc
195
+ value: 23.59
196
+ name: accuracy
197
+ source:
198
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=mixtao/MixTAO-7Bx2-MoE-v8.1
199
  name: Open LLM Leaderboard
200
  ---
201
 
 
229
  |MMLU (5-Shot) |64.92|
230
  |TruthfulQA (0-shot) |78.57|
231
  |Winogrande (5-shot) |87.37|
232
+ |GSM8k (5-shot) |71.11|
233
+ # [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard)
234
+ Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_mixtao__MixTAO-7Bx2-MoE-v8.1)
235
+
236
+ | Metric |Value|
237
+ |-------------------|----:|
238
+ |Avg. |20.90|
239
+ |IFEval (0-Shot) |41.62|
240
+ |BBH (3-Shot) |32.31|
241
+ |MATH Lvl 5 (4-Shot)| 8.01|
242
+ |GPQA (0-shot) | 4.59|
243
+ |MuSR (0-shot) |15.29|
244
+ |MMLU-PRO (5-shot) |23.59|
245
+