eval

Browse files

Files changed (1) hide show

README.md +92 -0

README.md CHANGED Viewed

@@ -40,6 +40,98 @@ model-index:
  - type: acc
  value: 77.27
  name: acc
 license: llama3.1
 ---

  - type: acc
  value: 77.27
  name: acc
+ - task:
+ type: text-generation
+ name: Text Generation
+ dataset:
+ name: IFEval (0-Shot)
+ type: HuggingFaceH4/ifeval
+ args:
+ num_few_shot: 0
+ metrics:
+ - type: inst_level_strict_acc and prompt_level_strict_acc
+ value: 55.39
+ name: strict accuracy
+ source:
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-Enigma
+ name: Open LLM Leaderboard
+ - task:
+ type: text-generation
+ name: Text Generation
+ dataset:
+ name: BBH (3-Shot)
+ type: BBH
+ args:
+ num_few_shot: 3
+ metrics:
+ - type: acc_norm
+ value: 28.47
+ name: normalized accuracy
+ source:
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-Enigma
+ name: Open LLM Leaderboard
+ - task:
+ type: text-generation
+ name: Text Generation
+ dataset:
+ name: MATH Lvl 5 (4-Shot)
+ type: hendrycks/competition_math
+ args:
+ num_few_shot: 4
+ metrics:
+ - type: exact_match
+ value: 10.12
+ name: exact match
+ source:
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-Enigma
+ name: Open LLM Leaderboard
+ - task:
+ type: text-generation
+ name: Text Generation
+ dataset:
+ name: GPQA (0-shot)
+ type: Idavidrein/gpqa
+ args:
+ num_few_shot: 0
+ metrics:
+ - type: acc_norm
+ value: 1.57
+ name: acc_norm
+ source:
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-Enigma
+ name: Open LLM Leaderboard
+ - task:
+ type: text-generation
+ name: Text Generation
+ dataset:
+ name: MuSR (0-shot)
+ type: TAUR-Lab/MuSR
+ args:
+ num_few_shot: 0
+ metrics:
+ - type: acc_norm
+ value: 11.41
+ name: acc_norm
+ source:
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-Enigma
+ name: Open LLM Leaderboard
+ - task:
+ type: text-generation
+ name: Text Generation
+ dataset:
+ name: MMLU-PRO (5-shot)
+ type: TIGER-Lab/MMLU-Pro
+ config: main
+ split: test
+ args:
+ num_few_shot: 5
+ metrics:
+ - type: acc
+ value: 26.2
+ name: accuracy
+ source:
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-Enigma
+ name: Open LLM Leaderboard
 license: llama3.1
 ---