neuralmagic
/

Phi-3-mini-128k-instruct-FP8

@@ -10,46 +10,6 @@
  "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM"
  },
  "bos_token_id": 1,
- "compression_config": {
- "config_groups": {
- "group_0": {
- "input_activations": {
- "block_structure": null,
- "dynamic": false,
- "group_size": null,
- "num_bits": 8,
- "observer": "minmax",
- "observer_kwargs": {},
- "strategy": "tensor",
- "symmetric": true,
- "type": "float"
- },
- "output_activations": null,
- "targets": [
- "Linear"
- ],
- "weights": {
- "block_structure": null,
- "dynamic": false,
- "group_size": null,
- "num_bits": 8,
- "observer": "minmax",
- "observer_kwargs": {},
- "strategy": "tensor",
- "symmetric": true,
- "type": "float"
- }
- }
- },
- "format": "float-quantized",
- "global_compression_ratio": 1.246153835878366,
- "ignore": [
- "lm_head"
- ],
- "kv_cache_scheme": null,
- "quant_method": "compressed-tensors",
- "quantization_status": "frozen"
- },
  "embd_pdrop": 0.0,
  "eos_token_id": 32000,
  "hidden_act": "silu",
@@ -174,5 +134,45 @@
  "torch_dtype": "float16",
  "transformers_version": "4.44.0",
  "use_cache": true,
- "vocab_size": 32064
 }

  "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM"
  },
  "bos_token_id": 1,
  "embd_pdrop": 0.0,
  "eos_token_id": 32000,
  "hidden_act": "silu",
  "torch_dtype": "float16",
  "transformers_version": "4.44.0",
  "use_cache": true,
+ "vocab_size": 32064,
+ "quantization_config": {
+ "config_groups": {
+ "group_0": {
+ "input_activations": {
+ "block_structure": null,
+ "dynamic": false,
+ "group_size": null,
+ "num_bits": 8,
+ "observer": "minmax",
+ "observer_kwargs": {},
+ "strategy": "tensor",
+ "symmetric": true,
+ "type": "float"
+ },
+ "output_activations": null,
+ "targets": [
+ "Linear"
+ ],
+ "weights": {
+ "block_structure": null,
+ "dynamic": false,
+ "group_size": null,
+ "num_bits": 8,
+ "observer": "minmax",
+ "observer_kwargs": {},
+ "strategy": "tensor",
+ "symmetric": true,
+ "type": "float"
+ }
+ }
+ },
+ "format": "float-quantized",
+ "global_compression_ratio": 1.246153835878366,
+ "ignore": [
+ "lm_head"
+ ],
+ "kv_cache_scheme": null,
+ "quant_method": "compressed-tensors",
+ "quantization_status": "frozen"
+ }
 }