Text Generation
English
Eval Results
gpt2 / BASELINE.yaml
d-matrix-user's picture
commit tokenizer
3434f81
raw
history blame
11.9 kB
model:
lm_head:
accum_format: SAME
approximation_function: NONE
input_format: SAME
instance: Linear
output_format: SAME
weight_format: SAME
weight_sparseness: DENSE
transformer.drop:
approximation_function: NONE
input_format: SAME
instance: Dropout
output_format: SAME
transformer.h.0.attn.attn_dropout:
approximation_function: NONE
input_format: SAME
instance: Dropout
output_format: SAME
transformer.h.0.attn.c_attn:
approximation_function: NONE
bias_format: SAME
input_format: SAME
instance: HFTransformersConv1D
output_format: SAME
weight_format: SAME
weight_sparseness: DENSE
transformer.h.0.attn.c_proj:
approximation_function: NONE
bias_format: SAME
input_format: SAME
instance: HFTransformersConv1D
output_format: SAME
weight_format: SAME
weight_sparseness: DENSE
transformer.h.0.attn.resid_dropout:
approximation_function: NONE
input_format: SAME
instance: Dropout
output_format: SAME
transformer.h.0.attn.softmax:
approximation_function: NONE
input_format: SAME
instance: Softmax
output_format: SAME
transformer.h.0.ln_1:
approximation_function: NONE
bias_format: SAME
input_format: SAME
instance: LayerNorm
output_format: SAME
weight_format: SAME
transformer.h.0.ln_2:
approximation_function: NONE
bias_format: SAME
input_format: SAME
instance: LayerNorm
output_format: SAME
weight_format: SAME
transformer.h.0.mlp.act:
approximation_function: NONE
input_format: SAME
instance: GELU
output_format: SAME
transformer.h.0.mlp.c_fc:
approximation_function: NONE
bias_format: SAME
input_format: SAME
instance: HFTransformersConv1D
output_format: SAME
weight_format: SAME
weight_sparseness: DENSE
transformer.h.0.mlp.c_proj:
approximation_function: NONE
bias_format: SAME
input_format: SAME
instance: HFTransformersConv1D
output_format: SAME
weight_format: SAME
weight_sparseness: DENSE
transformer.h.0.mlp.dropout:
approximation_function: NONE
input_format: SAME
instance: Dropout
output_format: SAME
transformer.h.1.attn.attn_dropout:
approximation_function: NONE
input_format: SAME
instance: Dropout
output_format: SAME
transformer.h.1.attn.c_attn:
approximation_function: NONE
bias_format: SAME
input_format: SAME
instance: HFTransformersConv1D
output_format: SAME
weight_format: SAME
weight_sparseness: DENSE
transformer.h.1.attn.c_proj:
approximation_function: NONE
bias_format: SAME
input_format: SAME
instance: HFTransformersConv1D
output_format: SAME
weight_format: SAME
weight_sparseness: DENSE
transformer.h.1.attn.resid_dropout:
approximation_function: NONE
input_format: SAME
instance: Dropout
output_format: SAME
transformer.h.1.attn.softmax:
approximation_function: NONE
input_format: SAME
instance: Softmax
output_format: SAME
transformer.h.1.ln_1:
approximation_function: NONE
bias_format: SAME
input_format: SAME
instance: LayerNorm
output_format: SAME
weight_format: SAME
transformer.h.1.ln_2:
approximation_function: NONE
bias_format: SAME
input_format: SAME
instance: LayerNorm
output_format: SAME
weight_format: SAME
transformer.h.1.mlp.act:
approximation_function: NONE
input_format: SAME
instance: GELU
output_format: SAME
transformer.h.1.mlp.c_fc:
approximation_function: NONE
bias_format: SAME
input_format: SAME
instance: HFTransformersConv1D
output_format: SAME
weight_format: SAME
weight_sparseness: DENSE
transformer.h.1.mlp.c_proj:
approximation_function: NONE
bias_format: SAME
input_format: SAME
instance: HFTransformersConv1D
output_format: SAME
weight_format: SAME
weight_sparseness: DENSE
transformer.h.1.mlp.dropout:
approximation_function: NONE
input_format: SAME
instance: Dropout
output_format: SAME
transformer.h.2.attn.attn_dropout:
approximation_function: NONE
input_format: SAME
instance: Dropout
output_format: SAME
transformer.h.2.attn.c_attn:
approximation_function: NONE
bias_format: SAME
input_format: SAME
instance: HFTransformersConv1D
output_format: SAME
weight_format: SAME
weight_sparseness: DENSE
transformer.h.2.attn.c_proj:
approximation_function: NONE
bias_format: SAME
input_format: SAME
instance: HFTransformersConv1D
output_format: SAME
weight_format: SAME
weight_sparseness: DENSE
transformer.h.2.attn.resid_dropout:
approximation_function: NONE
input_format: SAME
instance: Dropout
output_format: SAME
transformer.h.2.attn.softmax:
approximation_function: NONE
input_format: SAME
instance: Softmax
output_format: SAME
transformer.h.2.ln_1:
approximation_function: NONE
bias_format: SAME
input_format: SAME
instance: LayerNorm
output_format: SAME
weight_format: SAME
transformer.h.2.ln_2:
approximation_function: NONE
bias_format: SAME
input_format: SAME
instance: LayerNorm
output_format: SAME
weight_format: SAME
transformer.h.2.mlp.act:
approximation_function: NONE
input_format: SAME
instance: GELU
output_format: SAME
transformer.h.2.mlp.c_fc:
approximation_function: NONE
bias_format: SAME
input_format: SAME
instance: HFTransformersConv1D
output_format: SAME
weight_format: SAME
weight_sparseness: DENSE
transformer.h.2.mlp.c_proj:
approximation_function: NONE
bias_format: SAME
input_format: SAME
instance: HFTransformersConv1D
output_format: SAME
weight_format: SAME
weight_sparseness: DENSE
transformer.h.2.mlp.dropout:
approximation_function: NONE
input_format: SAME
instance: Dropout
output_format: SAME
transformer.h.3.attn.attn_dropout:
approximation_function: NONE
input_format: SAME
instance: Dropout
output_format: SAME
transformer.h.3.attn.c_attn:
approximation_function: NONE
bias_format: SAME
input_format: SAME
instance: HFTransformersConv1D
output_format: SAME
weight_format: SAME
weight_sparseness: DENSE
transformer.h.3.attn.c_proj:
approximation_function: NONE
bias_format: SAME
input_format: SAME
instance: HFTransformersConv1D
output_format: SAME
weight_format: SAME
weight_sparseness: DENSE
transformer.h.3.attn.resid_dropout:
approximation_function: NONE
input_format: SAME
instance: Dropout
output_format: SAME
transformer.h.3.attn.softmax:
approximation_function: NONE
input_format: SAME
instance: Softmax
output_format: SAME
transformer.h.3.ln_1:
approximation_function: NONE
bias_format: SAME
input_format: SAME
instance: LayerNorm
output_format: SAME
weight_format: SAME
transformer.h.3.ln_2:
approximation_function: NONE
bias_format: SAME
input_format: SAME
instance: LayerNorm
output_format: SAME
weight_format: SAME
transformer.h.3.mlp.act:
approximation_function: NONE
input_format: SAME
instance: GELU
output_format: SAME
transformer.h.3.mlp.c_fc:
approximation_function: NONE
bias_format: SAME
input_format: SAME
instance: HFTransformersConv1D
output_format: SAME
weight_format: SAME
weight_sparseness: DENSE
transformer.h.3.mlp.c_proj:
approximation_function: NONE
bias_format: SAME
input_format: SAME
instance: HFTransformersConv1D
output_format: SAME
weight_format: SAME
weight_sparseness: DENSE
transformer.h.3.mlp.dropout:
approximation_function: NONE
input_format: SAME
instance: Dropout
output_format: SAME
transformer.h.4.attn.attn_dropout:
approximation_function: NONE
input_format: SAME
instance: Dropout
output_format: SAME
transformer.h.4.attn.c_attn:
approximation_function: NONE
bias_format: SAME
input_format: SAME
instance: HFTransformersConv1D
output_format: SAME
weight_format: SAME
weight_sparseness: DENSE
transformer.h.4.attn.c_proj:
approximation_function: NONE
bias_format: SAME
input_format: SAME
instance: HFTransformersConv1D
output_format: SAME
weight_format: SAME
weight_sparseness: DENSE
transformer.h.4.attn.resid_dropout:
approximation_function: NONE
input_format: SAME
instance: Dropout
output_format: SAME
transformer.h.4.attn.softmax:
approximation_function: NONE
input_format: SAME
instance: Softmax
output_format: SAME
transformer.h.4.ln_1:
approximation_function: NONE
bias_format: SAME
input_format: SAME
instance: LayerNorm
output_format: SAME
weight_format: SAME
transformer.h.4.ln_2:
approximation_function: NONE
bias_format: SAME
input_format: SAME
instance: LayerNorm
output_format: SAME
weight_format: SAME
transformer.h.4.mlp.act:
approximation_function: NONE
input_format: SAME
instance: GELU
output_format: SAME
transformer.h.4.mlp.c_fc:
approximation_function: NONE
bias_format: SAME
input_format: SAME
instance: HFTransformersConv1D
output_format: SAME
weight_format: SAME
weight_sparseness: DENSE
transformer.h.4.mlp.c_proj:
approximation_function: NONE
bias_format: SAME
input_format: SAME
instance: HFTransformersConv1D
output_format: SAME
weight_format: SAME
weight_sparseness: DENSE
transformer.h.4.mlp.dropout:
approximation_function: NONE
input_format: SAME
instance: Dropout
output_format: SAME
transformer.h.5.attn.attn_dropout:
approximation_function: NONE
input_format: SAME
instance: Dropout
output_format: SAME
transformer.h.5.attn.c_attn:
approximation_function: NONE
bias_format: SAME
input_format: SAME
instance: HFTransformersConv1D
output_format: SAME
weight_format: SAME
weight_sparseness: DENSE
transformer.h.5.attn.c_proj:
approximation_function: NONE
bias_format: SAME
input_format: SAME
instance: HFTransformersConv1D
output_format: SAME
weight_format: SAME
weight_sparseness: DENSE
transformer.h.5.attn.resid_dropout:
approximation_function: NONE
input_format: SAME
instance: Dropout
output_format: SAME
transformer.h.5.attn.softmax:
approximation_function: NONE
input_format: SAME
instance: Softmax
output_format: SAME
transformer.h.5.ln_1:
approximation_function: NONE
bias_format: SAME
input_format: SAME
instance: LayerNorm
output_format: SAME
weight_format: SAME
transformer.h.5.ln_2:
approximation_function: NONE
bias_format: SAME
input_format: SAME
instance: LayerNorm
output_format: SAME
weight_format: SAME
transformer.h.5.mlp.act:
approximation_function: NONE
input_format: SAME
instance: GELU
output_format: SAME
transformer.h.5.mlp.c_fc:
approximation_function: NONE
bias_format: SAME
input_format: SAME
instance: HFTransformersConv1D
output_format: SAME
weight_format: SAME
weight_sparseness: DENSE
transformer.h.5.mlp.c_proj:
approximation_function: NONE
bias_format: SAME
input_format: SAME
instance: HFTransformersConv1D
output_format: SAME
weight_format: SAME
weight_sparseness: DENSE
transformer.h.5.mlp.dropout:
approximation_function: NONE
input_format: SAME
instance: Dropout
output_format: SAME
transformer.ln_f:
approximation_function: NONE
bias_format: SAME
input_format: SAME
instance: LayerNorm
output_format: SAME
weight_format: SAME