lm_head: accum_format: SAME approximation_function: NONE input_format: SAME instance: Linear output_format: SAME weight_format: SAME weight_sparseness: DENSE transformer.drop: approximation_function: NONE input_format: SAME instance: Dropout output_format: SAME transformer.h.0.attn.attn_dropout: approximation_function: NONE input_format: SAME instance: Dropout output_format: BFP[8|8]{64,-1}(SN) transformer.h.0.attn.c_attn: approximation_function: NONE bias_format: SAME input_format: BFP[8|8]{64,-1}(SN) instance: HFTransformersConv1D output_format: BFP[8|8]{64,-1}(SN) weight_format: BFP[8|8]{64,0}(SN) weight_sparseness: DENSE transformer.h.0.attn.c_proj: approximation_function: NONE bias_format: SAME input_format: BFP[8|8]{64,-1}(SN) instance: HFTransformersConv1D output_format: SAME weight_format: BFP[8|8]{64,0}(SN) weight_sparseness: DENSE transformer.h.0.attn.resid_dropout: approximation_function: NONE input_format: SAME instance: Dropout output_format: SAME transformer.h.0.attn.softmax: approximation_function: SOFTMAX(base2,float16) input_format: SAME instance: Softmax output_format: SAME transformer.h.0.ln_1: approximation_function: LAYERNORM(fallback,4,float16) bias_format: SAME input_format: SAME instance: LayerNorm output_format: SAME weight_format: SAME transformer.h.0.ln_2: approximation_function: LAYERNORM(fallback,4,float16) bias_format: SAME input_format: SAME instance: LayerNorm output_format: SAME weight_format: SAME transformer.h.0.mlp.act: approximation_function: GELU(poly2,float16) input_format: SAME instance: GELU output_format: SAME transformer.h.0.mlp.c_fc: approximation_function: NONE bias_format: SAME input_format: BFP[8|8]{64,-1}(SN) instance: HFTransformersConv1D output_format: SAME weight_format: BFP[8|8]{64,0}(SN) weight_sparseness: DENSE transformer.h.0.mlp.c_proj: approximation_function: NONE bias_format: SAME input_format: BFP[8|8]{64,-1}(SN) instance: HFTransformersConv1D output_format: SAME weight_format: BFP[8|8]{64,0}(SN) weight_sparseness: DENSE transformer.h.0.mlp.dropout: approximation_function: NONE input_format: SAME instance: Dropout output_format: SAME transformer.h.1.attn.attn_dropout: approximation_function: NONE input_format: SAME instance: Dropout output_format: BFP[8|8]{64,-1}(SN) transformer.h.1.attn.c_attn: approximation_function: NONE bias_format: SAME input_format: BFP[8|8]{64,-1}(SN) instance: HFTransformersConv1D output_format: BFP[8|8]{64,-1}(SN) weight_format: BFP[8|8]{64,0}(SN) weight_sparseness: DENSE transformer.h.1.attn.c_proj: approximation_function: NONE bias_format: SAME input_format: BFP[8|8]{64,-1}(SN) instance: HFTransformersConv1D output_format: SAME weight_format: BFP[8|8]{64,0}(SN) weight_sparseness: DENSE transformer.h.1.attn.resid_dropout: approximation_function: NONE input_format: SAME instance: Dropout output_format: SAME transformer.h.1.attn.softmax: approximation_function: SOFTMAX(base2,float16) input_format: SAME instance: Softmax output_format: SAME transformer.h.1.ln_1: approximation_function: LAYERNORM(fallback,4,float16) bias_format: SAME input_format: SAME instance: LayerNorm output_format: SAME weight_format: SAME transformer.h.1.ln_2: approximation_function: LAYERNORM(fallback,4,float16) bias_format: SAME input_format: SAME instance: LayerNorm output_format: SAME weight_format: SAME transformer.h.1.mlp.act: approximation_function: GELU(poly2,float16) input_format: SAME instance: GELU output_format: SAME transformer.h.1.mlp.c_fc: approximation_function: NONE bias_format: SAME input_format: BFP[8|8]{64,-1}(SN) instance: HFTransformersConv1D output_format: SAME weight_format: BFP[8|8]{64,0}(SN) weight_sparseness: DENSE transformer.h.1.mlp.c_proj: approximation_function: NONE bias_format: SAME input_format: BFP[8|8]{64,-1}(SN) instance: HFTransformersConv1D output_format: SAME weight_format: BFP[8|8]{64,0}(SN) weight_sparseness: DENSE transformer.h.1.mlp.dropout: approximation_function: NONE input_format: SAME instance: Dropout output_format: SAME transformer.h.2.attn.attn_dropout: approximation_function: NONE input_format: SAME instance: Dropout output_format: BFP[8|8]{64,-1}(SN) transformer.h.2.attn.c_attn: approximation_function: NONE bias_format: SAME input_format: BFP[8|8]{64,-1}(SN) instance: HFTransformersConv1D output_format: BFP[8|8]{64,-1}(SN) weight_format: BFP[8|8]{64,0}(SN) weight_sparseness: DENSE transformer.h.2.attn.c_proj: approximation_function: NONE bias_format: SAME input_format: BFP[8|8]{64,-1}(SN) instance: HFTransformersConv1D output_format: SAME weight_format: BFP[8|8]{64,0}(SN) weight_sparseness: DENSE transformer.h.2.attn.resid_dropout: approximation_function: NONE input_format: SAME instance: Dropout output_format: SAME transformer.h.2.attn.softmax: approximation_function: SOFTMAX(base2,float16) input_format: SAME instance: Softmax output_format: SAME transformer.h.2.ln_1: approximation_function: LAYERNORM(fallback,4,float16) bias_format: SAME input_format: SAME instance: LayerNorm output_format: SAME weight_format: SAME transformer.h.2.ln_2: approximation_function: LAYERNORM(fallback,4,float16) bias_format: SAME input_format: SAME instance: LayerNorm output_format: SAME weight_format: SAME transformer.h.2.mlp.act: approximation_function: GELU(poly2,float16) input_format: SAME instance: GELU output_format: SAME transformer.h.2.mlp.c_fc: approximation_function: NONE bias_format: SAME input_format: BFP[8|8]{64,-1}(SN) instance: HFTransformersConv1D output_format: SAME weight_format: BFP[8|8]{64,0}(SN) weight_sparseness: DENSE transformer.h.2.mlp.c_proj: approximation_function: NONE bias_format: SAME input_format: BFP[8|8]{64,-1}(SN) instance: HFTransformersConv1D output_format: SAME weight_format: BFP[8|8]{64,0}(SN) weight_sparseness: DENSE transformer.h.2.mlp.dropout: approximation_function: NONE input_format: SAME instance: Dropout output_format: SAME transformer.h.3.attn.attn_dropout: approximation_function: NONE input_format: SAME instance: Dropout output_format: BFP[8|8]{64,-1}(SN) transformer.h.3.attn.c_attn: approximation_function: NONE bias_format: SAME input_format: BFP[8|8]{64,-1}(SN) instance: HFTransformersConv1D output_format: BFP[8|8]{64,-1}(SN) weight_format: BFP[8|8]{64,0}(SN) weight_sparseness: DENSE transformer.h.3.attn.c_proj: approximation_function: NONE bias_format: SAME input_format: BFP[8|8]{64,-1}(SN) instance: HFTransformersConv1D output_format: SAME weight_format: BFP[8|8]{64,0}(SN) weight_sparseness: DENSE transformer.h.3.attn.resid_dropout: approximation_function: NONE input_format: SAME instance: Dropout output_format: SAME transformer.h.3.attn.softmax: approximation_function: SOFTMAX(base2,float16) input_format: SAME instance: Softmax output_format: SAME transformer.h.3.ln_1: approximation_function: LAYERNORM(fallback,4,float16) bias_format: SAME input_format: SAME instance: LayerNorm output_format: SAME weight_format: SAME transformer.h.3.ln_2: approximation_function: LAYERNORM(fallback,4,float16) bias_format: SAME input_format: SAME instance: LayerNorm output_format: SAME weight_format: SAME transformer.h.3.mlp.act: approximation_function: GELU(poly2,float16) input_format: SAME instance: GELU output_format: SAME transformer.h.3.mlp.c_fc: approximation_function: NONE bias_format: SAME input_format: BFP[8|8]{64,-1}(SN) instance: HFTransformersConv1D output_format: SAME weight_format: BFP[8|8]{64,0}(SN) weight_sparseness: DENSE transformer.h.3.mlp.c_proj: approximation_function: NONE bias_format: SAME input_format: BFP[8|8]{64,-1}(SN) instance: HFTransformersConv1D output_format: SAME weight_format: BFP[8|8]{64,0}(SN) weight_sparseness: DENSE transformer.h.3.mlp.dropout: approximation_function: NONE input_format: SAME instance: Dropout output_format: SAME transformer.h.4.attn.attn_dropout: approximation_function: NONE input_format: SAME instance: Dropout output_format: BFP[8|8]{64,-1}(SN) transformer.h.4.attn.c_attn: approximation_function: NONE bias_format: SAME input_format: BFP[8|8]{64,-1}(SN) instance: HFTransformersConv1D output_format: BFP[8|8]{64,-1}(SN) weight_format: BFP[8|8]{64,0}(SN) weight_sparseness: DENSE transformer.h.4.attn.c_proj: approximation_function: NONE bias_format: SAME input_format: BFP[8|8]{64,-1}(SN) instance: HFTransformersConv1D output_format: SAME weight_format: BFP[8|8]{64,0}(SN) weight_sparseness: DENSE transformer.h.4.attn.resid_dropout: approximation_function: NONE input_format: SAME instance: Dropout output_format: SAME transformer.h.4.attn.softmax: approximation_function: SOFTMAX(base2,float16) input_format: SAME instance: Softmax output_format: SAME transformer.h.4.ln_1: approximation_function: LAYERNORM(fallback,4,float16) bias_format: SAME input_format: SAME instance: LayerNorm output_format: SAME weight_format: SAME transformer.h.4.ln_2: approximation_function: LAYERNORM(fallback,4,float16) bias_format: SAME input_format: SAME instance: LayerNorm output_format: SAME weight_format: SAME transformer.h.4.mlp.act: approximation_function: GELU(poly2,float16) input_format: SAME instance: GELU output_format: SAME transformer.h.4.mlp.c_fc: approximation_function: NONE bias_format: SAME input_format: BFP[8|8]{64,-1}(SN) instance: HFTransformersConv1D output_format: SAME weight_format: BFP[8|8]{64,0}(SN) weight_sparseness: DENSE transformer.h.4.mlp.c_proj: approximation_function: NONE bias_format: SAME input_format: BFP[8|8]{64,-1}(SN) instance: HFTransformersConv1D output_format: SAME weight_format: BFP[8|8]{64,0}(SN) weight_sparseness: DENSE transformer.h.4.mlp.dropout: approximation_function: NONE input_format: SAME instance: Dropout output_format: SAME transformer.h.5.attn.attn_dropout: approximation_function: NONE input_format: SAME instance: Dropout output_format: BFP[8|8]{64,-1}(SN) transformer.h.5.attn.c_attn: approximation_function: NONE bias_format: SAME input_format: BFP[8|8]{64,-1}(SN) instance: HFTransformersConv1D output_format: BFP[8|8]{64,-1}(SN) weight_format: BFP[8|8]{64,0}(SN) weight_sparseness: DENSE transformer.h.5.attn.c_proj: approximation_function: NONE bias_format: SAME input_format: BFP[8|8]{64,-1}(SN) instance: HFTransformersConv1D output_format: SAME weight_format: BFP[8|8]{64,0}(SN) weight_sparseness: DENSE transformer.h.5.attn.resid_dropout: approximation_function: NONE input_format: SAME instance: Dropout output_format: SAME transformer.h.5.attn.softmax: approximation_function: SOFTMAX(base2,float16) input_format: SAME instance: Softmax output_format: SAME transformer.h.5.ln_1: approximation_function: LAYERNORM(fallback,4,float16) bias_format: SAME input_format: SAME instance: LayerNorm output_format: SAME weight_format: SAME transformer.h.5.ln_2: approximation_function: LAYERNORM(fallback,4,float16) bias_format: SAME input_format: SAME instance: LayerNorm output_format: SAME weight_format: SAME transformer.h.5.mlp.act: approximation_function: GELU(poly2,float16) input_format: SAME instance: GELU output_format: SAME transformer.h.5.mlp.c_fc: approximation_function: NONE bias_format: SAME input_format: BFP[8|8]{64,-1}(SN) instance: HFTransformersConv1D output_format: SAME weight_format: BFP[8|8]{64,0}(SN) weight_sparseness: DENSE transformer.h.5.mlp.c_proj: approximation_function: NONE bias_format: SAME input_format: BFP[8|8]{64,-1}(SN) instance: HFTransformersConv1D output_format: SAME weight_format: BFP[8|8]{64,0}(SN) weight_sparseness: DENSE transformer.h.5.mlp.dropout: approximation_function: NONE input_format: SAME instance: Dropout output_format: SAME transformer.ln_f: approximation_function: LAYERNORM(fallback,4,float16) bias_format: SAME input_format: SAME instance: LayerNorm output_format: SAME weight_format: SAME