jtatman commited on
Commit
e722b7d
1 Parent(s): 2608579

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +49 -1
README.md CHANGED
@@ -83,4 +83,52 @@ Dataset was formatted in ShareGpt format for the purposes of using with Axolotl,
83
  - num_epochs: 3
84
  - optimizer: adamw_bnb_8bit
85
  - lr_scheduler: cosine
86
- - learning_rate: 0.00025
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  - num_epochs: 3
84
  - optimizer: adamw_bnb_8bit
85
  - lr_scheduler: cosine
86
+ - learning_rate: 0.00025
87
+
88
+ #### Evaluation
89
+
90
+ | Groups |Version| Filter |n-shot| Metric | Value | |Stderr|
91
+ |--------------------|-------|----------------|-----:|-----------|------:|---|-----:|
92
+ |Open LLM Leaderboard|N/A |none | 5|rouge2_acc | 0.1920|± |0.0176|
93
+ | | |none | 5|bleu_max |15.2292|± |0.6714|
94
+ | | |flexible-extract| 5|exact_match| 0.0220|± |0.0066|
95
+ | - truthfulqa_mc1 | 2|none | 0|acc | 0.2440|± |0.0192|
96
+ | - truthfulqa_mc2 | 2|none | 0|acc | 0.4430|± |0.0195|
97
+ | - winogrande | 1|none | 5|acc | 0.5120|± |0.0224|
98
+ | - arc_challenge | 1|none | 25|acc | 0.1760|± |0.0170|
99
+ | | |none | 25|acc_norm | 0.2320|± |0.0189|
100
+ | - gsm8k | 3|strict-match | 5|exact_match| 0.0060|± |0.0035|
101
+ | | |flexible-extract| 5|exact_match| 0.0220|± |0.0066|
102
+ | - hellaswag | 1|none | 10|acc | 0.3520|± |0.0214|
103
+ | | |none | 10|acc_norm | 0.4040|± |0.0220|
104
+ | | |none | 5|rouge2_diff|-3.3178|± |0.9477|
105
+ | | |none | 5|rougeL_acc | 0.3860|± |0.0218|
106
+ | | |none | 5|acc_norm | 0.3180|± |0.0145|
107
+ | | |none | 5|rouge1_diff|-1.5564|± |1.0223|
108
+ | | |none | 5|bleu_diff |-0.6500|± |0.6421|
109
+ | | |none | 5|rouge2_max |16.4873|± |1.0172|
110
+ | | |none | 5|rougeL_diff|-0.7765|± |1.0034|
111
+ | | |strict-match | 5|exact_match| 0.0060|± |0.0035|
112
+ | | |none | 5|bleu_acc | 0.4360|± |0.0222|
113
+ | | |none | 5|rougeL_max |33.8798|± |0.9367|
114
+ | | |none | 5|rouge1_max |36.3550|± |0.9462|
115
+ | | |none | 5|rouge1_acc | 0.3700|± |0.0216|
116
+ | | |none | 5|acc | 0.2664|± |0.0036|
117
+ | - mmlu |N/A |none | 0|acc | 0.2533|± |0.0039|
118
+ | - humanities |N/A |none | 5|acc | 0.2408|± |0.0075|
119
+ | - other |N/A |none | 5|acc | 0.2443|± |0.0080|
120
+ | - social_sciences |N/A |none | 5|acc | 0.2538|± |0.0081|
121
+ | - stem |N/A |none | 5|acc | 0.2740|± |0.0079|
122
+ | - truthfulqa |N/A |none | 0|rouge2_acc | 0.1920|± |0.0176|
123
+ | | |none | 0|rougeL_diff|-0.7765|± |1.0034|
124
+ | | |none | 0|bleu_max |15.2292|± |0.6714|
125
+ | | |none | 0|rouge2_diff|-3.3178|± |0.9477|
126
+ | | |none | 0|rougeL_acc | 0.3860|± |0.0218|
127
+ | | |none | 0|bleu_diff |-0.6500|± |0.6421|
128
+ | | |none | 0|rouge2_max |16.4873|± |1.0172|
129
+ | | |none | 0|rouge1_diff|-1.5564|± |1.0223|
130
+ | | |none | 0|acc | 0.3435|± |0.0137|
131
+ | | |none | 0|bleu_acc | 0.4360|± |0.0222|
132
+ | | |none | 0|rougeL_max |33.8798|± |0.9367|
133
+ | | |none | 0|rouge1_max |36.3550|± |0.9462|
134
+ | | |none | 0|rouge1_acc | 0.3700|± |0.0216|