wanzin commited on
Commit
271eb00
1 Parent(s): 95db0b0

updating the configs of gpt2-xl

Browse files
Files changed (2) hide show
  1. config.json +4 -3
  2. configs/BASIC.yaml +193 -193
config.json CHANGED
@@ -15,10 +15,11 @@
15
  "layer_norm_epsilon": 1e-05,
16
  "model_type": "gpt2",
17
  "n_ctx": 1024,
18
- "n_embd": 768,
19
- "n_head": 12,
20
- "n_layer": 12,
21
  "n_positions": 1024,
 
22
  "resid_pdrop": 0.1,
23
  "summary_activation": null,
24
  "summary_first_dropout": 0.1,
 
15
  "layer_norm_epsilon": 1e-05,
16
  "model_type": "gpt2",
17
  "n_ctx": 1024,
18
+ "n_embd": 1600,
19
+ "n_head": 25,
20
+ "n_layer": 48,
21
  "n_positions": 1024,
22
+ "output_past": true,
23
  "resid_pdrop": 0.1,
24
  "summary_activation": null,
25
  "summary_first_dropout": 0.1,
configs/BASIC.yaml CHANGED
@@ -39,26 +39,26 @@ model:
39
  instance: Dropout
40
  output_format: SAME
41
  transformer.h.0.attn.softmax:
42
- approximation_function: SOFTMAX(base2,float16)
43
  input_format: SAME
44
  instance: Softmax
45
  output_format: SAME
46
  transformer.h.0.ln_1:
47
- approximation_function: LAYERNORM(fallback,4,float16)
48
  bias_format: SAME
49
  input_format: SAME
50
  instance: LayerNorm
51
  output_format: SAME
52
  weight_format: SAME
53
  transformer.h.0.ln_2:
54
- approximation_function: LAYERNORM(fallback,4,float16)
55
  bias_format: SAME
56
  input_format: SAME
57
  instance: LayerNorm
58
  output_format: SAME
59
  weight_format: SAME
60
  transformer.h.0.mlp.act:
61
- approximation_function: GELU(poly2,float16)
62
  input_format: SAME
63
  instance: GELU
64
  output_format: SAME
@@ -110,26 +110,26 @@ model:
110
  instance: Dropout
111
  output_format: SAME
112
  transformer.h.1.attn.softmax:
113
- approximation_function: SOFTMAX(base2,float16)
114
  input_format: SAME
115
  instance: Softmax
116
  output_format: SAME
117
  transformer.h.1.ln_1:
118
- approximation_function: LAYERNORM(fallback,4,float16)
119
  bias_format: SAME
120
  input_format: SAME
121
  instance: LayerNorm
122
  output_format: SAME
123
  weight_format: SAME
124
  transformer.h.1.ln_2:
125
- approximation_function: LAYERNORM(fallback,4,float16)
126
  bias_format: SAME
127
  input_format: SAME
128
  instance: LayerNorm
129
  output_format: SAME
130
  weight_format: SAME
131
  transformer.h.1.mlp.act:
132
- approximation_function: GELU(poly2,float16)
133
  input_format: SAME
134
  instance: GELU
135
  output_format: SAME
@@ -181,26 +181,26 @@ model:
181
  instance: Dropout
182
  output_format: SAME
183
  transformer.h.10.attn.softmax:
184
- approximation_function: SOFTMAX(base2,float16)
185
  input_format: SAME
186
  instance: Softmax
187
  output_format: SAME
188
  transformer.h.10.ln_1:
189
- approximation_function: LAYERNORM(fallback,4,float16)
190
  bias_format: SAME
191
  input_format: SAME
192
  instance: LayerNorm
193
  output_format: SAME
194
  weight_format: SAME
195
  transformer.h.10.ln_2:
196
- approximation_function: LAYERNORM(fallback,4,float16)
197
  bias_format: SAME
198
  input_format: SAME
199
  instance: LayerNorm
200
  output_format: SAME
201
  weight_format: SAME
202
  transformer.h.10.mlp.act:
203
- approximation_function: GELU(poly2,float16)
204
  input_format: SAME
205
  instance: GELU
206
  output_format: SAME
@@ -252,26 +252,26 @@ model:
252
  instance: Dropout
253
  output_format: SAME
254
  transformer.h.11.attn.softmax:
255
- approximation_function: SOFTMAX(base2,float16)
256
  input_format: SAME
257
  instance: Softmax
258
  output_format: SAME
259
  transformer.h.11.ln_1:
260
- approximation_function: LAYERNORM(fallback,4,float16)
261
  bias_format: SAME
262
  input_format: SAME
263
  instance: LayerNorm
264
  output_format: SAME
265
  weight_format: SAME
266
  transformer.h.11.ln_2:
267
- approximation_function: LAYERNORM(fallback,4,float16)
268
  bias_format: SAME
269
  input_format: SAME
270
  instance: LayerNorm
271
  output_format: SAME
272
  weight_format: SAME
273
  transformer.h.11.mlp.act:
274
- approximation_function: GELU(poly2,float16)
275
  input_format: SAME
276
  instance: GELU
277
  output_format: SAME
@@ -323,26 +323,26 @@ model:
323
  instance: Dropout
324
  output_format: SAME
325
  transformer.h.12.attn.softmax:
326
- approximation_function: SOFTMAX(base2,float16)
327
  input_format: SAME
328
  instance: Softmax
329
  output_format: SAME
330
  transformer.h.12.ln_1:
331
- approximation_function: LAYERNORM(fallback,4,float16)
332
  bias_format: SAME
333
  input_format: SAME
334
  instance: LayerNorm
335
  output_format: SAME
336
  weight_format: SAME
337
  transformer.h.12.ln_2:
338
- approximation_function: LAYERNORM(fallback,4,float16)
339
  bias_format: SAME
340
  input_format: SAME
341
  instance: LayerNorm
342
  output_format: SAME
343
  weight_format: SAME
344
  transformer.h.12.mlp.act:
345
- approximation_function: GELU(poly2,float16)
346
  input_format: SAME
347
  instance: GELU
348
  output_format: SAME
@@ -394,26 +394,26 @@ model:
394
  instance: Dropout
395
  output_format: SAME
396
  transformer.h.13.attn.softmax:
397
- approximation_function: SOFTMAX(base2,float16)
398
  input_format: SAME
399
  instance: Softmax
400
  output_format: SAME
401
  transformer.h.13.ln_1:
402
- approximation_function: LAYERNORM(fallback,4,float16)
403
  bias_format: SAME
404
  input_format: SAME
405
  instance: LayerNorm
406
  output_format: SAME
407
  weight_format: SAME
408
  transformer.h.13.ln_2:
409
- approximation_function: LAYERNORM(fallback,4,float16)
410
  bias_format: SAME
411
  input_format: SAME
412
  instance: LayerNorm
413
  output_format: SAME
414
  weight_format: SAME
415
  transformer.h.13.mlp.act:
416
- approximation_function: GELU(poly2,float16)
417
  input_format: SAME
418
  instance: GELU
419
  output_format: SAME
@@ -465,26 +465,26 @@ model:
465
  instance: Dropout
466
  output_format: SAME
467
  transformer.h.14.attn.softmax:
468
- approximation_function: SOFTMAX(base2,float16)
469
  input_format: SAME
470
  instance: Softmax
471
  output_format: SAME
472
  transformer.h.14.ln_1:
473
- approximation_function: LAYERNORM(fallback,4,float16)
474
  bias_format: SAME
475
  input_format: SAME
476
  instance: LayerNorm
477
  output_format: SAME
478
  weight_format: SAME
479
  transformer.h.14.ln_2:
480
- approximation_function: LAYERNORM(fallback,4,float16)
481
  bias_format: SAME
482
  input_format: SAME
483
  instance: LayerNorm
484
  output_format: SAME
485
  weight_format: SAME
486
  transformer.h.14.mlp.act:
487
- approximation_function: GELU(poly2,float16)
488
  input_format: SAME
489
  instance: GELU
490
  output_format: SAME
@@ -536,26 +536,26 @@ model:
536
  instance: Dropout
537
  output_format: SAME
538
  transformer.h.15.attn.softmax:
539
- approximation_function: SOFTMAX(base2,float16)
540
  input_format: SAME
541
  instance: Softmax
542
  output_format: SAME
543
  transformer.h.15.ln_1:
544
- approximation_function: LAYERNORM(fallback,4,float16)
545
  bias_format: SAME
546
  input_format: SAME
547
  instance: LayerNorm
548
  output_format: SAME
549
  weight_format: SAME
550
  transformer.h.15.ln_2:
551
- approximation_function: LAYERNORM(fallback,4,float16)
552
  bias_format: SAME
553
  input_format: SAME
554
  instance: LayerNorm
555
  output_format: SAME
556
  weight_format: SAME
557
  transformer.h.15.mlp.act:
558
- approximation_function: GELU(poly2,float16)
559
  input_format: SAME
560
  instance: GELU
561
  output_format: SAME
@@ -607,26 +607,26 @@ model:
607
  instance: Dropout
608
  output_format: SAME
609
  transformer.h.16.attn.softmax:
610
- approximation_function: SOFTMAX(base2,float16)
611
  input_format: SAME
612
  instance: Softmax
613
  output_format: SAME
614
  transformer.h.16.ln_1:
615
- approximation_function: LAYERNORM(fallback,4,float16)
616
  bias_format: SAME
617
  input_format: SAME
618
  instance: LayerNorm
619
  output_format: SAME
620
  weight_format: SAME
621
  transformer.h.16.ln_2:
622
- approximation_function: LAYERNORM(fallback,4,float16)
623
  bias_format: SAME
624
  input_format: SAME
625
  instance: LayerNorm
626
  output_format: SAME
627
  weight_format: SAME
628
  transformer.h.16.mlp.act:
629
- approximation_function: GELU(poly2,float16)
630
  input_format: SAME
631
  instance: GELU
632
  output_format: SAME
@@ -678,26 +678,26 @@ model:
678
  instance: Dropout
679
  output_format: SAME
680
  transformer.h.17.attn.softmax:
681
- approximation_function: SOFTMAX(base2,float16)
682
  input_format: SAME
683
  instance: Softmax
684
  output_format: SAME
685
  transformer.h.17.ln_1:
686
- approximation_function: LAYERNORM(fallback,4,float16)
687
  bias_format: SAME
688
  input_format: SAME
689
  instance: LayerNorm
690
  output_format: SAME
691
  weight_format: SAME
692
  transformer.h.17.ln_2:
693
- approximation_function: LAYERNORM(fallback,4,float16)
694
  bias_format: SAME
695
  input_format: SAME
696
  instance: LayerNorm
697
  output_format: SAME
698
  weight_format: SAME
699
  transformer.h.17.mlp.act:
700
- approximation_function: GELU(poly2,float16)
701
  input_format: SAME
702
  instance: GELU
703
  output_format: SAME
@@ -749,26 +749,26 @@ model:
749
  instance: Dropout
750
  output_format: SAME
751
  transformer.h.18.attn.softmax:
752
- approximation_function: SOFTMAX(base2,float16)
753
  input_format: SAME
754
  instance: Softmax
755
  output_format: SAME
756
  transformer.h.18.ln_1:
757
- approximation_function: LAYERNORM(fallback,4,float16)
758
  bias_format: SAME
759
  input_format: SAME
760
  instance: LayerNorm
761
  output_format: SAME
762
  weight_format: SAME
763
  transformer.h.18.ln_2:
764
- approximation_function: LAYERNORM(fallback,4,float16)
765
  bias_format: SAME
766
  input_format: SAME
767
  instance: LayerNorm
768
  output_format: SAME
769
  weight_format: SAME
770
  transformer.h.18.mlp.act:
771
- approximation_function: GELU(poly2,float16)
772
  input_format: SAME
773
  instance: GELU
774
  output_format: SAME
@@ -820,26 +820,26 @@ model:
820
  instance: Dropout
821
  output_format: SAME
822
  transformer.h.19.attn.softmax:
823
- approximation_function: SOFTMAX(base2,float16)
824
  input_format: SAME
825
  instance: Softmax
826
  output_format: SAME
827
  transformer.h.19.ln_1:
828
- approximation_function: LAYERNORM(fallback,4,float16)
829
  bias_format: SAME
830
  input_format: SAME
831
  instance: LayerNorm
832
  output_format: SAME
833
  weight_format: SAME
834
  transformer.h.19.ln_2:
835
- approximation_function: LAYERNORM(fallback,4,float16)
836
  bias_format: SAME
837
  input_format: SAME
838
  instance: LayerNorm
839
  output_format: SAME
840
  weight_format: SAME
841
  transformer.h.19.mlp.act:
842
- approximation_function: GELU(poly2,float16)
843
  input_format: SAME
844
  instance: GELU
845
  output_format: SAME
@@ -891,26 +891,26 @@ model:
891
  instance: Dropout
892
  output_format: SAME
893
  transformer.h.2.attn.softmax:
894
- approximation_function: SOFTMAX(base2,float16)
895
  input_format: SAME
896
  instance: Softmax
897
  output_format: SAME
898
  transformer.h.2.ln_1:
899
- approximation_function: LAYERNORM(fallback,4,float16)
900
  bias_format: SAME
901
  input_format: SAME
902
  instance: LayerNorm
903
  output_format: SAME
904
  weight_format: SAME
905
  transformer.h.2.ln_2:
906
- approximation_function: LAYERNORM(fallback,4,float16)
907
  bias_format: SAME
908
  input_format: SAME
909
  instance: LayerNorm
910
  output_format: SAME
911
  weight_format: SAME
912
  transformer.h.2.mlp.act:
913
- approximation_function: GELU(poly2,float16)
914
  input_format: SAME
915
  instance: GELU
916
  output_format: SAME
@@ -962,26 +962,26 @@ model:
962
  instance: Dropout
963
  output_format: SAME
964
  transformer.h.20.attn.softmax:
965
- approximation_function: SOFTMAX(base2,float16)
966
  input_format: SAME
967
  instance: Softmax
968
  output_format: SAME
969
  transformer.h.20.ln_1:
970
- approximation_function: LAYERNORM(fallback,4,float16)
971
  bias_format: SAME
972
  input_format: SAME
973
  instance: LayerNorm
974
  output_format: SAME
975
  weight_format: SAME
976
  transformer.h.20.ln_2:
977
- approximation_function: LAYERNORM(fallback,4,float16)
978
  bias_format: SAME
979
  input_format: SAME
980
  instance: LayerNorm
981
  output_format: SAME
982
  weight_format: SAME
983
  transformer.h.20.mlp.act:
984
- approximation_function: GELU(poly2,float16)
985
  input_format: SAME
986
  instance: GELU
987
  output_format: SAME
@@ -1033,26 +1033,26 @@ model:
1033
  instance: Dropout
1034
  output_format: SAME
1035
  transformer.h.21.attn.softmax:
1036
- approximation_function: SOFTMAX(base2,float16)
1037
  input_format: SAME
1038
  instance: Softmax
1039
  output_format: SAME
1040
  transformer.h.21.ln_1:
1041
- approximation_function: LAYERNORM(fallback,4,float16)
1042
  bias_format: SAME
1043
  input_format: SAME
1044
  instance: LayerNorm
1045
  output_format: SAME
1046
  weight_format: SAME
1047
  transformer.h.21.ln_2:
1048
- approximation_function: LAYERNORM(fallback,4,float16)
1049
  bias_format: SAME
1050
  input_format: SAME
1051
  instance: LayerNorm
1052
  output_format: SAME
1053
  weight_format: SAME
1054
  transformer.h.21.mlp.act:
1055
- approximation_function: GELU(poly2,float16)
1056
  input_format: SAME
1057
  instance: GELU
1058
  output_format: SAME
@@ -1104,26 +1104,26 @@ model:
1104
  instance: Dropout
1105
  output_format: SAME
1106
  transformer.h.22.attn.softmax:
1107
- approximation_function: SOFTMAX(base2,float16)
1108
  input_format: SAME
1109
  instance: Softmax
1110
  output_format: SAME
1111
  transformer.h.22.ln_1:
1112
- approximation_function: LAYERNORM(fallback,4,float16)
1113
  bias_format: SAME
1114
  input_format: SAME
1115
  instance: LayerNorm
1116
  output_format: SAME
1117
  weight_format: SAME
1118
  transformer.h.22.ln_2:
1119
- approximation_function: LAYERNORM(fallback,4,float16)
1120
  bias_format: SAME
1121
  input_format: SAME
1122
  instance: LayerNorm
1123
  output_format: SAME
1124
  weight_format: SAME
1125
  transformer.h.22.mlp.act:
1126
- approximation_function: GELU(poly2,float16)
1127
  input_format: SAME
1128
  instance: GELU
1129
  output_format: SAME
@@ -1175,26 +1175,26 @@ model:
1175
  instance: Dropout
1176
  output_format: SAME
1177
  transformer.h.23.attn.softmax:
1178
- approximation_function: SOFTMAX(base2,float16)
1179
  input_format: SAME
1180
  instance: Softmax
1181
  output_format: SAME
1182
  transformer.h.23.ln_1:
1183
- approximation_function: LAYERNORM(fallback,4,float16)
1184
  bias_format: SAME
1185
  input_format: SAME
1186
  instance: LayerNorm
1187
  output_format: SAME
1188
  weight_format: SAME
1189
  transformer.h.23.ln_2:
1190
- approximation_function: LAYERNORM(fallback,4,float16)
1191
  bias_format: SAME
1192
  input_format: SAME
1193
  instance: LayerNorm
1194
  output_format: SAME
1195
  weight_format: SAME
1196
  transformer.h.23.mlp.act:
1197
- approximation_function: GELU(poly2,float16)
1198
  input_format: SAME
1199
  instance: GELU
1200
  output_format: SAME
@@ -1246,26 +1246,26 @@ model:
1246
  instance: Dropout
1247
  output_format: SAME
1248
  transformer.h.24.attn.softmax:
1249
- approximation_function: SOFTMAX(base2,float16)
1250
  input_format: SAME
1251
  instance: Softmax
1252
  output_format: SAME
1253
  transformer.h.24.ln_1:
1254
- approximation_function: LAYERNORM(fallback,4,float16)
1255
  bias_format: SAME
1256
  input_format: SAME
1257
  instance: LayerNorm
1258
  output_format: SAME
1259
  weight_format: SAME
1260
  transformer.h.24.ln_2:
1261
- approximation_function: LAYERNORM(fallback,4,float16)
1262
  bias_format: SAME
1263
  input_format: SAME
1264
  instance: LayerNorm
1265
  output_format: SAME
1266
  weight_format: SAME
1267
  transformer.h.24.mlp.act:
1268
- approximation_function: GELU(poly2,float16)
1269
  input_format: SAME
1270
  instance: GELU
1271
  output_format: SAME
@@ -1317,26 +1317,26 @@ model:
1317
  instance: Dropout
1318
  output_format: SAME
1319
  transformer.h.25.attn.softmax:
1320
- approximation_function: SOFTMAX(base2,float16)
1321
  input_format: SAME
1322
  instance: Softmax
1323
  output_format: SAME
1324
  transformer.h.25.ln_1:
1325
- approximation_function: LAYERNORM(fallback,4,float16)
1326
  bias_format: SAME
1327
  input_format: SAME
1328
  instance: LayerNorm
1329
  output_format: SAME
1330
  weight_format: SAME
1331
  transformer.h.25.ln_2:
1332
- approximation_function: LAYERNORM(fallback,4,float16)
1333
  bias_format: SAME
1334
  input_format: SAME
1335
  instance: LayerNorm
1336
  output_format: SAME
1337
  weight_format: SAME
1338
  transformer.h.25.mlp.act:
1339
- approximation_function: GELU(poly2,float16)
1340
  input_format: SAME
1341
  instance: GELU
1342
  output_format: SAME
@@ -1388,26 +1388,26 @@ model:
1388
  instance: Dropout
1389
  output_format: SAME
1390
  transformer.h.26.attn.softmax:
1391
- approximation_function: SOFTMAX(base2,float16)
1392
  input_format: SAME
1393
  instance: Softmax
1394
  output_format: SAME
1395
  transformer.h.26.ln_1:
1396
- approximation_function: LAYERNORM(fallback,4,float16)
1397
  bias_format: SAME
1398
  input_format: SAME
1399
  instance: LayerNorm
1400
  output_format: SAME
1401
  weight_format: SAME
1402
  transformer.h.26.ln_2:
1403
- approximation_function: LAYERNORM(fallback,4,float16)
1404
  bias_format: SAME
1405
  input_format: SAME
1406
  instance: LayerNorm
1407
  output_format: SAME
1408
  weight_format: SAME
1409
  transformer.h.26.mlp.act:
1410
- approximation_function: GELU(poly2,float16)
1411
  input_format: SAME
1412
  instance: GELU
1413
  output_format: SAME
@@ -1459,26 +1459,26 @@ model:
1459
  instance: Dropout
1460
  output_format: SAME
1461
  transformer.h.27.attn.softmax:
1462
- approximation_function: SOFTMAX(base2,float16)
1463
  input_format: SAME
1464
  instance: Softmax
1465
  output_format: SAME
1466
  transformer.h.27.ln_1:
1467
- approximation_function: LAYERNORM(fallback,4,float16)
1468
  bias_format: SAME
1469
  input_format: SAME
1470
  instance: LayerNorm
1471
  output_format: SAME
1472
  weight_format: SAME
1473
  transformer.h.27.ln_2:
1474
- approximation_function: LAYERNORM(fallback,4,float16)
1475
  bias_format: SAME
1476
  input_format: SAME
1477
  instance: LayerNorm
1478
  output_format: SAME
1479
  weight_format: SAME
1480
  transformer.h.27.mlp.act:
1481
- approximation_function: GELU(poly2,float16)
1482
  input_format: SAME
1483
  instance: GELU
1484
  output_format: SAME
@@ -1530,26 +1530,26 @@ model:
1530
  instance: Dropout
1531
  output_format: SAME
1532
  transformer.h.28.attn.softmax:
1533
- approximation_function: SOFTMAX(base2,float16)
1534
  input_format: SAME
1535
  instance: Softmax
1536
  output_format: SAME
1537
  transformer.h.28.ln_1:
1538
- approximation_function: LAYERNORM(fallback,4,float16)
1539
  bias_format: SAME
1540
  input_format: SAME
1541
  instance: LayerNorm
1542
  output_format: SAME
1543
  weight_format: SAME
1544
  transformer.h.28.ln_2:
1545
- approximation_function: LAYERNORM(fallback,4,float16)
1546
  bias_format: SAME
1547
  input_format: SAME
1548
  instance: LayerNorm
1549
  output_format: SAME
1550
  weight_format: SAME
1551
  transformer.h.28.mlp.act:
1552
- approximation_function: GELU(poly2,float16)
1553
  input_format: SAME
1554
  instance: GELU
1555
  output_format: SAME
@@ -1601,26 +1601,26 @@ model:
1601
  instance: Dropout
1602
  output_format: SAME
1603
  transformer.h.29.attn.softmax:
1604
- approximation_function: SOFTMAX(base2,float16)
1605
  input_format: SAME
1606
  instance: Softmax
1607
  output_format: SAME
1608
  transformer.h.29.ln_1:
1609
- approximation_function: LAYERNORM(fallback,4,float16)
1610
  bias_format: SAME
1611
  input_format: SAME
1612
  instance: LayerNorm
1613
  output_format: SAME
1614
  weight_format: SAME
1615
  transformer.h.29.ln_2:
1616
- approximation_function: LAYERNORM(fallback,4,float16)
1617
  bias_format: SAME
1618
  input_format: SAME
1619
  instance: LayerNorm
1620
  output_format: SAME
1621
  weight_format: SAME
1622
  transformer.h.29.mlp.act:
1623
- approximation_function: GELU(poly2,float16)
1624
  input_format: SAME
1625
  instance: GELU
1626
  output_format: SAME
@@ -1672,26 +1672,26 @@ model:
1672
  instance: Dropout
1673
  output_format: SAME
1674
  transformer.h.3.attn.softmax:
1675
- approximation_function: SOFTMAX(base2,float16)
1676
  input_format: SAME
1677
  instance: Softmax
1678
  output_format: SAME
1679
  transformer.h.3.ln_1:
1680
- approximation_function: LAYERNORM(fallback,4,float16)
1681
  bias_format: SAME
1682
  input_format: SAME
1683
  instance: LayerNorm
1684
  output_format: SAME
1685
  weight_format: SAME
1686
  transformer.h.3.ln_2:
1687
- approximation_function: LAYERNORM(fallback,4,float16)
1688
  bias_format: SAME
1689
  input_format: SAME
1690
  instance: LayerNorm
1691
  output_format: SAME
1692
  weight_format: SAME
1693
  transformer.h.3.mlp.act:
1694
- approximation_function: GELU(poly2,float16)
1695
  input_format: SAME
1696
  instance: GELU
1697
  output_format: SAME
@@ -1743,26 +1743,26 @@ model:
1743
  instance: Dropout
1744
  output_format: SAME
1745
  transformer.h.30.attn.softmax:
1746
- approximation_function: SOFTMAX(base2,float16)
1747
  input_format: SAME
1748
  instance: Softmax
1749
  output_format: SAME
1750
  transformer.h.30.ln_1:
1751
- approximation_function: LAYERNORM(fallback,4,float16)
1752
  bias_format: SAME
1753
  input_format: SAME
1754
  instance: LayerNorm
1755
  output_format: SAME
1756
  weight_format: SAME
1757
  transformer.h.30.ln_2:
1758
- approximation_function: LAYERNORM(fallback,4,float16)
1759
  bias_format: SAME
1760
  input_format: SAME
1761
  instance: LayerNorm
1762
  output_format: SAME
1763
  weight_format: SAME
1764
  transformer.h.30.mlp.act:
1765
- approximation_function: GELU(poly2,float16)
1766
  input_format: SAME
1767
  instance: GELU
1768
  output_format: SAME
@@ -1814,26 +1814,26 @@ model:
1814
  instance: Dropout
1815
  output_format: SAME
1816
  transformer.h.31.attn.softmax:
1817
- approximation_function: SOFTMAX(base2,float16)
1818
  input_format: SAME
1819
  instance: Softmax
1820
  output_format: SAME
1821
  transformer.h.31.ln_1:
1822
- approximation_function: LAYERNORM(fallback,4,float16)
1823
  bias_format: SAME
1824
  input_format: SAME
1825
  instance: LayerNorm
1826
  output_format: SAME
1827
  weight_format: SAME
1828
  transformer.h.31.ln_2:
1829
- approximation_function: LAYERNORM(fallback,4,float16)
1830
  bias_format: SAME
1831
  input_format: SAME
1832
  instance: LayerNorm
1833
  output_format: SAME
1834
  weight_format: SAME
1835
  transformer.h.31.mlp.act:
1836
- approximation_function: GELU(poly2,float16)
1837
  input_format: SAME
1838
  instance: GELU
1839
  output_format: SAME
@@ -1885,26 +1885,26 @@ model:
1885
  instance: Dropout
1886
  output_format: SAME
1887
  transformer.h.32.attn.softmax:
1888
- approximation_function: SOFTMAX(base2,float16)
1889
  input_format: SAME
1890
  instance: Softmax
1891
  output_format: SAME
1892
  transformer.h.32.ln_1:
1893
- approximation_function: LAYERNORM(fallback,4,float16)
1894
  bias_format: SAME
1895
  input_format: SAME
1896
  instance: LayerNorm
1897
  output_format: SAME
1898
  weight_format: SAME
1899
  transformer.h.32.ln_2:
1900
- approximation_function: LAYERNORM(fallback,4,float16)
1901
  bias_format: SAME
1902
  input_format: SAME
1903
  instance: LayerNorm
1904
  output_format: SAME
1905
  weight_format: SAME
1906
  transformer.h.32.mlp.act:
1907
- approximation_function: GELU(poly2,float16)
1908
  input_format: SAME
1909
  instance: GELU
1910
  output_format: SAME
@@ -1956,26 +1956,26 @@ model:
1956
  instance: Dropout
1957
  output_format: SAME
1958
  transformer.h.33.attn.softmax:
1959
- approximation_function: SOFTMAX(base2,float16)
1960
  input_format: SAME
1961
  instance: Softmax
1962
  output_format: SAME
1963
  transformer.h.33.ln_1:
1964
- approximation_function: LAYERNORM(fallback,4,float16)
1965
  bias_format: SAME
1966
  input_format: SAME
1967
  instance: LayerNorm
1968
  output_format: SAME
1969
  weight_format: SAME
1970
  transformer.h.33.ln_2:
1971
- approximation_function: LAYERNORM(fallback,4,float16)
1972
  bias_format: SAME
1973
  input_format: SAME
1974
  instance: LayerNorm
1975
  output_format: SAME
1976
  weight_format: SAME
1977
  transformer.h.33.mlp.act:
1978
- approximation_function: GELU(poly2,float16)
1979
  input_format: SAME
1980
  instance: GELU
1981
  output_format: SAME
@@ -2027,26 +2027,26 @@ model:
2027
  instance: Dropout
2028
  output_format: SAME
2029
  transformer.h.34.attn.softmax:
2030
- approximation_function: SOFTMAX(base2,float16)
2031
  input_format: SAME
2032
  instance: Softmax
2033
  output_format: SAME
2034
  transformer.h.34.ln_1:
2035
- approximation_function: LAYERNORM(fallback,4,float16)
2036
  bias_format: SAME
2037
  input_format: SAME
2038
  instance: LayerNorm
2039
  output_format: SAME
2040
  weight_format: SAME
2041
  transformer.h.34.ln_2:
2042
- approximation_function: LAYERNORM(fallback,4,float16)
2043
  bias_format: SAME
2044
  input_format: SAME
2045
  instance: LayerNorm
2046
  output_format: SAME
2047
  weight_format: SAME
2048
  transformer.h.34.mlp.act:
2049
- approximation_function: GELU(poly2,float16)
2050
  input_format: SAME
2051
  instance: GELU
2052
  output_format: SAME
@@ -2098,26 +2098,26 @@ model:
2098
  instance: Dropout
2099
  output_format: SAME
2100
  transformer.h.35.attn.softmax:
2101
- approximation_function: SOFTMAX(base2,float16)
2102
  input_format: SAME
2103
  instance: Softmax
2104
  output_format: SAME
2105
  transformer.h.35.ln_1:
2106
- approximation_function: LAYERNORM(fallback,4,float16)
2107
  bias_format: SAME
2108
  input_format: SAME
2109
  instance: LayerNorm
2110
  output_format: SAME
2111
  weight_format: SAME
2112
  transformer.h.35.ln_2:
2113
- approximation_function: LAYERNORM(fallback,4,float16)
2114
  bias_format: SAME
2115
  input_format: SAME
2116
  instance: LayerNorm
2117
  output_format: SAME
2118
  weight_format: SAME
2119
  transformer.h.35.mlp.act:
2120
- approximation_function: GELU(poly2,float16)
2121
  input_format: SAME
2122
  instance: GELU
2123
  output_format: SAME
@@ -2169,26 +2169,26 @@ model:
2169
  instance: Dropout
2170
  output_format: SAME
2171
  transformer.h.36.attn.softmax:
2172
- approximation_function: SOFTMAX(base2,float16)
2173
  input_format: SAME
2174
  instance: Softmax
2175
  output_format: SAME
2176
  transformer.h.36.ln_1:
2177
- approximation_function: LAYERNORM(fallback,4,float16)
2178
  bias_format: SAME
2179
  input_format: SAME
2180
  instance: LayerNorm
2181
  output_format: SAME
2182
  weight_format: SAME
2183
  transformer.h.36.ln_2:
2184
- approximation_function: LAYERNORM(fallback,4,float16)
2185
  bias_format: SAME
2186
  input_format: SAME
2187
  instance: LayerNorm
2188
  output_format: SAME
2189
  weight_format: SAME
2190
  transformer.h.36.mlp.act:
2191
- approximation_function: GELU(poly2,float16)
2192
  input_format: SAME
2193
  instance: GELU
2194
  output_format: SAME
@@ -2240,26 +2240,26 @@ model:
2240
  instance: Dropout
2241
  output_format: SAME
2242
  transformer.h.37.attn.softmax:
2243
- approximation_function: SOFTMAX(base2,float16)
2244
  input_format: SAME
2245
  instance: Softmax
2246
  output_format: SAME
2247
  transformer.h.37.ln_1:
2248
- approximation_function: LAYERNORM(fallback,4,float16)
2249
  bias_format: SAME
2250
  input_format: SAME
2251
  instance: LayerNorm
2252
  output_format: SAME
2253
  weight_format: SAME
2254
  transformer.h.37.ln_2:
2255
- approximation_function: LAYERNORM(fallback,4,float16)
2256
  bias_format: SAME
2257
  input_format: SAME
2258
  instance: LayerNorm
2259
  output_format: SAME
2260
  weight_format: SAME
2261
  transformer.h.37.mlp.act:
2262
- approximation_function: GELU(poly2,float16)
2263
  input_format: SAME
2264
  instance: GELU
2265
  output_format: SAME
@@ -2311,26 +2311,26 @@ model:
2311
  instance: Dropout
2312
  output_format: SAME
2313
  transformer.h.38.attn.softmax:
2314
- approximation_function: SOFTMAX(base2,float16)
2315
  input_format: SAME
2316
  instance: Softmax
2317
  output_format: SAME
2318
  transformer.h.38.ln_1:
2319
- approximation_function: LAYERNORM(fallback,4,float16)
2320
  bias_format: SAME
2321
  input_format: SAME
2322
  instance: LayerNorm
2323
  output_format: SAME
2324
  weight_format: SAME
2325
  transformer.h.38.ln_2:
2326
- approximation_function: LAYERNORM(fallback,4,float16)
2327
  bias_format: SAME
2328
  input_format: SAME
2329
  instance: LayerNorm
2330
  output_format: SAME
2331
  weight_format: SAME
2332
  transformer.h.38.mlp.act:
2333
- approximation_function: GELU(poly2,float16)
2334
  input_format: SAME
2335
  instance: GELU
2336
  output_format: SAME
@@ -2382,26 +2382,26 @@ model:
2382
  instance: Dropout
2383
  output_format: SAME
2384
  transformer.h.39.attn.softmax:
2385
- approximation_function: SOFTMAX(base2,float16)
2386
  input_format: SAME
2387
  instance: Softmax
2388
  output_format: SAME
2389
  transformer.h.39.ln_1:
2390
- approximation_function: LAYERNORM(fallback,4,float16)
2391
  bias_format: SAME
2392
  input_format: SAME
2393
  instance: LayerNorm
2394
  output_format: SAME
2395
  weight_format: SAME
2396
  transformer.h.39.ln_2:
2397
- approximation_function: LAYERNORM(fallback,4,float16)
2398
  bias_format: SAME
2399
  input_format: SAME
2400
  instance: LayerNorm
2401
  output_format: SAME
2402
  weight_format: SAME
2403
  transformer.h.39.mlp.act:
2404
- approximation_function: GELU(poly2,float16)
2405
  input_format: SAME
2406
  instance: GELU
2407
  output_format: SAME
@@ -2453,26 +2453,26 @@ model:
2453
  instance: Dropout
2454
  output_format: SAME
2455
  transformer.h.4.attn.softmax:
2456
- approximation_function: SOFTMAX(base2,float16)
2457
  input_format: SAME
2458
  instance: Softmax
2459
  output_format: SAME
2460
  transformer.h.4.ln_1:
2461
- approximation_function: LAYERNORM(fallback,4,float16)
2462
  bias_format: SAME
2463
  input_format: SAME
2464
  instance: LayerNorm
2465
  output_format: SAME
2466
  weight_format: SAME
2467
  transformer.h.4.ln_2:
2468
- approximation_function: LAYERNORM(fallback,4,float16)
2469
  bias_format: SAME
2470
  input_format: SAME
2471
  instance: LayerNorm
2472
  output_format: SAME
2473
  weight_format: SAME
2474
  transformer.h.4.mlp.act:
2475
- approximation_function: GELU(poly2,float16)
2476
  input_format: SAME
2477
  instance: GELU
2478
  output_format: SAME
@@ -2524,26 +2524,26 @@ model:
2524
  instance: Dropout
2525
  output_format: SAME
2526
  transformer.h.40.attn.softmax:
2527
- approximation_function: SOFTMAX(base2,float16)
2528
  input_format: SAME
2529
  instance: Softmax
2530
  output_format: SAME
2531
  transformer.h.40.ln_1:
2532
- approximation_function: LAYERNORM(fallback,4,float16)
2533
  bias_format: SAME
2534
  input_format: SAME
2535
  instance: LayerNorm
2536
  output_format: SAME
2537
  weight_format: SAME
2538
  transformer.h.40.ln_2:
2539
- approximation_function: LAYERNORM(fallback,4,float16)
2540
  bias_format: SAME
2541
  input_format: SAME
2542
  instance: LayerNorm
2543
  output_format: SAME
2544
  weight_format: SAME
2545
  transformer.h.40.mlp.act:
2546
- approximation_function: GELU(poly2,float16)
2547
  input_format: SAME
2548
  instance: GELU
2549
  output_format: SAME
@@ -2595,26 +2595,26 @@ model:
2595
  instance: Dropout
2596
  output_format: SAME
2597
  transformer.h.41.attn.softmax:
2598
- approximation_function: SOFTMAX(base2,float16)
2599
  input_format: SAME
2600
  instance: Softmax
2601
  output_format: SAME
2602
  transformer.h.41.ln_1:
2603
- approximation_function: LAYERNORM(fallback,4,float16)
2604
  bias_format: SAME
2605
  input_format: SAME
2606
  instance: LayerNorm
2607
  output_format: SAME
2608
  weight_format: SAME
2609
  transformer.h.41.ln_2:
2610
- approximation_function: LAYERNORM(fallback,4,float16)
2611
  bias_format: SAME
2612
  input_format: SAME
2613
  instance: LayerNorm
2614
  output_format: SAME
2615
  weight_format: SAME
2616
  transformer.h.41.mlp.act:
2617
- approximation_function: GELU(poly2,float16)
2618
  input_format: SAME
2619
  instance: GELU
2620
  output_format: SAME
@@ -2666,26 +2666,26 @@ model:
2666
  instance: Dropout
2667
  output_format: SAME
2668
  transformer.h.42.attn.softmax:
2669
- approximation_function: SOFTMAX(base2,float16)
2670
  input_format: SAME
2671
  instance: Softmax
2672
  output_format: SAME
2673
  transformer.h.42.ln_1:
2674
- approximation_function: LAYERNORM(fallback,4,float16)
2675
  bias_format: SAME
2676
  input_format: SAME
2677
  instance: LayerNorm
2678
  output_format: SAME
2679
  weight_format: SAME
2680
  transformer.h.42.ln_2:
2681
- approximation_function: LAYERNORM(fallback,4,float16)
2682
  bias_format: SAME
2683
  input_format: SAME
2684
  instance: LayerNorm
2685
  output_format: SAME
2686
  weight_format: SAME
2687
  transformer.h.42.mlp.act:
2688
- approximation_function: GELU(poly2,float16)
2689
  input_format: SAME
2690
  instance: GELU
2691
  output_format: SAME
@@ -2737,26 +2737,26 @@ model:
2737
  instance: Dropout
2738
  output_format: SAME
2739
  transformer.h.43.attn.softmax:
2740
- approximation_function: SOFTMAX(base2,float16)
2741
  input_format: SAME
2742
  instance: Softmax
2743
  output_format: SAME
2744
  transformer.h.43.ln_1:
2745
- approximation_function: LAYERNORM(fallback,4,float16)
2746
  bias_format: SAME
2747
  input_format: SAME
2748
  instance: LayerNorm
2749
  output_format: SAME
2750
  weight_format: SAME
2751
  transformer.h.43.ln_2:
2752
- approximation_function: LAYERNORM(fallback,4,float16)
2753
  bias_format: SAME
2754
  input_format: SAME
2755
  instance: LayerNorm
2756
  output_format: SAME
2757
  weight_format: SAME
2758
  transformer.h.43.mlp.act:
2759
- approximation_function: GELU(poly2,float16)
2760
  input_format: SAME
2761
  instance: GELU
2762
  output_format: SAME
@@ -2808,26 +2808,26 @@ model:
2808
  instance: Dropout
2809
  output_format: SAME
2810
  transformer.h.44.attn.softmax:
2811
- approximation_function: SOFTMAX(base2,float16)
2812
  input_format: SAME
2813
  instance: Softmax
2814
  output_format: SAME
2815
  transformer.h.44.ln_1:
2816
- approximation_function: LAYERNORM(fallback,4,float16)
2817
  bias_format: SAME
2818
  input_format: SAME
2819
  instance: LayerNorm
2820
  output_format: SAME
2821
  weight_format: SAME
2822
  transformer.h.44.ln_2:
2823
- approximation_function: LAYERNORM(fallback,4,float16)
2824
  bias_format: SAME
2825
  input_format: SAME
2826
  instance: LayerNorm
2827
  output_format: SAME
2828
  weight_format: SAME
2829
  transformer.h.44.mlp.act:
2830
- approximation_function: GELU(poly2,float16)
2831
  input_format: SAME
2832
  instance: GELU
2833
  output_format: SAME
@@ -2879,26 +2879,26 @@ model:
2879
  instance: Dropout
2880
  output_format: SAME
2881
  transformer.h.45.attn.softmax:
2882
- approximation_function: SOFTMAX(base2,float16)
2883
  input_format: SAME
2884
  instance: Softmax
2885
  output_format: SAME
2886
  transformer.h.45.ln_1:
2887
- approximation_function: LAYERNORM(fallback,4,float16)
2888
  bias_format: SAME
2889
  input_format: SAME
2890
  instance: LayerNorm
2891
  output_format: SAME
2892
  weight_format: SAME
2893
  transformer.h.45.ln_2:
2894
- approximation_function: LAYERNORM(fallback,4,float16)
2895
  bias_format: SAME
2896
  input_format: SAME
2897
  instance: LayerNorm
2898
  output_format: SAME
2899
  weight_format: SAME
2900
  transformer.h.45.mlp.act:
2901
- approximation_function: GELU(poly2,float16)
2902
  input_format: SAME
2903
  instance: GELU
2904
  output_format: SAME
@@ -2950,26 +2950,26 @@ model:
2950
  instance: Dropout
2951
  output_format: SAME
2952
  transformer.h.46.attn.softmax:
2953
- approximation_function: SOFTMAX(base2,float16)
2954
  input_format: SAME
2955
  instance: Softmax
2956
  output_format: SAME
2957
  transformer.h.46.ln_1:
2958
- approximation_function: LAYERNORM(fallback,4,float16)
2959
  bias_format: SAME
2960
  input_format: SAME
2961
  instance: LayerNorm
2962
  output_format: SAME
2963
  weight_format: SAME
2964
  transformer.h.46.ln_2:
2965
- approximation_function: LAYERNORM(fallback,4,float16)
2966
  bias_format: SAME
2967
  input_format: SAME
2968
  instance: LayerNorm
2969
  output_format: SAME
2970
  weight_format: SAME
2971
  transformer.h.46.mlp.act:
2972
- approximation_function: GELU(poly2,float16)
2973
  input_format: SAME
2974
  instance: GELU
2975
  output_format: SAME
@@ -3021,26 +3021,26 @@ model:
3021
  instance: Dropout
3022
  output_format: SAME
3023
  transformer.h.47.attn.softmax:
3024
- approximation_function: SOFTMAX(base2,float16)
3025
  input_format: SAME
3026
  instance: Softmax
3027
  output_format: SAME
3028
  transformer.h.47.ln_1:
3029
- approximation_function: LAYERNORM(fallback,4,float16)
3030
  bias_format: SAME
3031
  input_format: SAME
3032
  instance: LayerNorm
3033
  output_format: SAME
3034
  weight_format: SAME
3035
  transformer.h.47.ln_2:
3036
- approximation_function: LAYERNORM(fallback,4,float16)
3037
  bias_format: SAME
3038
  input_format: SAME
3039
  instance: LayerNorm
3040
  output_format: SAME
3041
  weight_format: SAME
3042
  transformer.h.47.mlp.act:
3043
- approximation_function: GELU(poly2,float16)
3044
  input_format: SAME
3045
  instance: GELU
3046
  output_format: SAME
@@ -3092,26 +3092,26 @@ model:
3092
  instance: Dropout
3093
  output_format: SAME
3094
  transformer.h.5.attn.softmax:
3095
- approximation_function: SOFTMAX(base2,float16)
3096
  input_format: SAME
3097
  instance: Softmax
3098
  output_format: SAME
3099
  transformer.h.5.ln_1:
3100
- approximation_function: LAYERNORM(fallback,4,float16)
3101
  bias_format: SAME
3102
  input_format: SAME
3103
  instance: LayerNorm
3104
  output_format: SAME
3105
  weight_format: SAME
3106
  transformer.h.5.ln_2:
3107
- approximation_function: LAYERNORM(fallback,4,float16)
3108
  bias_format: SAME
3109
  input_format: SAME
3110
  instance: LayerNorm
3111
  output_format: SAME
3112
  weight_format: SAME
3113
  transformer.h.5.mlp.act:
3114
- approximation_function: GELU(poly2,float16)
3115
  input_format: SAME
3116
  instance: GELU
3117
  output_format: SAME
@@ -3163,26 +3163,26 @@ model:
3163
  instance: Dropout
3164
  output_format: SAME
3165
  transformer.h.6.attn.softmax:
3166
- approximation_function: SOFTMAX(base2,float16)
3167
  input_format: SAME
3168
  instance: Softmax
3169
  output_format: SAME
3170
  transformer.h.6.ln_1:
3171
- approximation_function: LAYERNORM(fallback,4,float16)
3172
  bias_format: SAME
3173
  input_format: SAME
3174
  instance: LayerNorm
3175
  output_format: SAME
3176
  weight_format: SAME
3177
  transformer.h.6.ln_2:
3178
- approximation_function: LAYERNORM(fallback,4,float16)
3179
  bias_format: SAME
3180
  input_format: SAME
3181
  instance: LayerNorm
3182
  output_format: SAME
3183
  weight_format: SAME
3184
  transformer.h.6.mlp.act:
3185
- approximation_function: GELU(poly2,float16)
3186
  input_format: SAME
3187
  instance: GELU
3188
  output_format: SAME
@@ -3234,26 +3234,26 @@ model:
3234
  instance: Dropout
3235
  output_format: SAME
3236
  transformer.h.7.attn.softmax:
3237
- approximation_function: SOFTMAX(base2,float16)
3238
  input_format: SAME
3239
  instance: Softmax
3240
  output_format: SAME
3241
  transformer.h.7.ln_1:
3242
- approximation_function: LAYERNORM(fallback,4,float16)
3243
  bias_format: SAME
3244
  input_format: SAME
3245
  instance: LayerNorm
3246
  output_format: SAME
3247
  weight_format: SAME
3248
  transformer.h.7.ln_2:
3249
- approximation_function: LAYERNORM(fallback,4,float16)
3250
  bias_format: SAME
3251
  input_format: SAME
3252
  instance: LayerNorm
3253
  output_format: SAME
3254
  weight_format: SAME
3255
  transformer.h.7.mlp.act:
3256
- approximation_function: GELU(poly2,float16)
3257
  input_format: SAME
3258
  instance: GELU
3259
  output_format: SAME
@@ -3305,26 +3305,26 @@ model:
3305
  instance: Dropout
3306
  output_format: SAME
3307
  transformer.h.8.attn.softmax:
3308
- approximation_function: SOFTMAX(base2,float16)
3309
  input_format: SAME
3310
  instance: Softmax
3311
  output_format: SAME
3312
  transformer.h.8.ln_1:
3313
- approximation_function: LAYERNORM(fallback,4,float16)
3314
  bias_format: SAME
3315
  input_format: SAME
3316
  instance: LayerNorm
3317
  output_format: SAME
3318
  weight_format: SAME
3319
  transformer.h.8.ln_2:
3320
- approximation_function: LAYERNORM(fallback,4,float16)
3321
  bias_format: SAME
3322
  input_format: SAME
3323
  instance: LayerNorm
3324
  output_format: SAME
3325
  weight_format: SAME
3326
  transformer.h.8.mlp.act:
3327
- approximation_function: GELU(poly2,float16)
3328
  input_format: SAME
3329
  instance: GELU
3330
  output_format: SAME
@@ -3376,26 +3376,26 @@ model:
3376
  instance: Dropout
3377
  output_format: SAME
3378
  transformer.h.9.attn.softmax:
3379
- approximation_function: SOFTMAX(base2,float16)
3380
  input_format: SAME
3381
  instance: Softmax
3382
  output_format: SAME
3383
  transformer.h.9.ln_1:
3384
- approximation_function: LAYERNORM(fallback,4,float16)
3385
  bias_format: SAME
3386
  input_format: SAME
3387
  instance: LayerNorm
3388
  output_format: SAME
3389
  weight_format: SAME
3390
  transformer.h.9.ln_2:
3391
- approximation_function: LAYERNORM(fallback,4,float16)
3392
  bias_format: SAME
3393
  input_format: SAME
3394
  instance: LayerNorm
3395
  output_format: SAME
3396
  weight_format: SAME
3397
  transformer.h.9.mlp.act:
3398
- approximation_function: GELU(poly2,float16)
3399
  input_format: SAME
3400
  instance: GELU
3401
  output_format: SAME
@@ -3421,7 +3421,7 @@ model:
3421
  instance: Dropout
3422
  output_format: SAME
3423
  transformer.ln_f:
3424
- approximation_function: LAYERNORM(fallback,4,float16)
3425
  bias_format: SAME
3426
  input_format: SAME
3427
  instance: LayerNorm
 
39
  instance: Dropout
40
  output_format: SAME
41
  transformer.h.0.attn.softmax:
42
+ approximation_function: NONE
43
  input_format: SAME
44
  instance: Softmax
45
  output_format: SAME
46
  transformer.h.0.ln_1:
47
+ approximation_function: NONE
48
  bias_format: SAME
49
  input_format: SAME
50
  instance: LayerNorm
51
  output_format: SAME
52
  weight_format: SAME
53
  transformer.h.0.ln_2:
54
+ approximation_function: NONE
55
  bias_format: SAME
56
  input_format: SAME
57
  instance: LayerNorm
58
  output_format: SAME
59
  weight_format: SAME
60
  transformer.h.0.mlp.act:
61
+ approximation_function: NONE
62
  input_format: SAME
63
  instance: GELU
64
  output_format: SAME
 
110
  instance: Dropout
111
  output_format: SAME
112
  transformer.h.1.attn.softmax:
113
+ approximation_function: NONE
114
  input_format: SAME
115
  instance: Softmax
116
  output_format: SAME
117
  transformer.h.1.ln_1:
118
+ approximation_function: NONE
119
  bias_format: SAME
120
  input_format: SAME
121
  instance: LayerNorm
122
  output_format: SAME
123
  weight_format: SAME
124
  transformer.h.1.ln_2:
125
+ approximation_function: NONE
126
  bias_format: SAME
127
  input_format: SAME
128
  instance: LayerNorm
129
  output_format: SAME
130
  weight_format: SAME
131
  transformer.h.1.mlp.act:
132
+ approximation_function: NONE
133
  input_format: SAME
134
  instance: GELU
135
  output_format: SAME
 
181
  instance: Dropout
182
  output_format: SAME
183
  transformer.h.10.attn.softmax:
184
+ approximation_function: NONE
185
  input_format: SAME
186
  instance: Softmax
187
  output_format: SAME
188
  transformer.h.10.ln_1:
189
+ approximation_function: NONE
190
  bias_format: SAME
191
  input_format: SAME
192
  instance: LayerNorm
193
  output_format: SAME
194
  weight_format: SAME
195
  transformer.h.10.ln_2:
196
+ approximation_function: NONE
197
  bias_format: SAME
198
  input_format: SAME
199
  instance: LayerNorm
200
  output_format: SAME
201
  weight_format: SAME
202
  transformer.h.10.mlp.act:
203
+ approximation_function: NONE
204
  input_format: SAME
205
  instance: GELU
206
  output_format: SAME
 
252
  instance: Dropout
253
  output_format: SAME
254
  transformer.h.11.attn.softmax:
255
+ approximation_function: NONE
256
  input_format: SAME
257
  instance: Softmax
258
  output_format: SAME
259
  transformer.h.11.ln_1:
260
+ approximation_function: NONE
261
  bias_format: SAME
262
  input_format: SAME
263
  instance: LayerNorm
264
  output_format: SAME
265
  weight_format: SAME
266
  transformer.h.11.ln_2:
267
+ approximation_function: NONE
268
  bias_format: SAME
269
  input_format: SAME
270
  instance: LayerNorm
271
  output_format: SAME
272
  weight_format: SAME
273
  transformer.h.11.mlp.act:
274
+ approximation_function: NONE
275
  input_format: SAME
276
  instance: GELU
277
  output_format: SAME
 
323
  instance: Dropout
324
  output_format: SAME
325
  transformer.h.12.attn.softmax:
326
+ approximation_function: NONE
327
  input_format: SAME
328
  instance: Softmax
329
  output_format: SAME
330
  transformer.h.12.ln_1:
331
+ approximation_function: NONE
332
  bias_format: SAME
333
  input_format: SAME
334
  instance: LayerNorm
335
  output_format: SAME
336
  weight_format: SAME
337
  transformer.h.12.ln_2:
338
+ approximation_function: NONE
339
  bias_format: SAME
340
  input_format: SAME
341
  instance: LayerNorm
342
  output_format: SAME
343
  weight_format: SAME
344
  transformer.h.12.mlp.act:
345
+ approximation_function: NONE
346
  input_format: SAME
347
  instance: GELU
348
  output_format: SAME
 
394
  instance: Dropout
395
  output_format: SAME
396
  transformer.h.13.attn.softmax:
397
+ approximation_function: NONE
398
  input_format: SAME
399
  instance: Softmax
400
  output_format: SAME
401
  transformer.h.13.ln_1:
402
+ approximation_function: NONE
403
  bias_format: SAME
404
  input_format: SAME
405
  instance: LayerNorm
406
  output_format: SAME
407
  weight_format: SAME
408
  transformer.h.13.ln_2:
409
+ approximation_function: NONE
410
  bias_format: SAME
411
  input_format: SAME
412
  instance: LayerNorm
413
  output_format: SAME
414
  weight_format: SAME
415
  transformer.h.13.mlp.act:
416
+ approximation_function: NONE
417
  input_format: SAME
418
  instance: GELU
419
  output_format: SAME
 
465
  instance: Dropout
466
  output_format: SAME
467
  transformer.h.14.attn.softmax:
468
+ approximation_function: NONE
469
  input_format: SAME
470
  instance: Softmax
471
  output_format: SAME
472
  transformer.h.14.ln_1:
473
+ approximation_function: NONE
474
  bias_format: SAME
475
  input_format: SAME
476
  instance: LayerNorm
477
  output_format: SAME
478
  weight_format: SAME
479
  transformer.h.14.ln_2:
480
+ approximation_function: NONE
481
  bias_format: SAME
482
  input_format: SAME
483
  instance: LayerNorm
484
  output_format: SAME
485
  weight_format: SAME
486
  transformer.h.14.mlp.act:
487
+ approximation_function: NONE
488
  input_format: SAME
489
  instance: GELU
490
  output_format: SAME
 
536
  instance: Dropout
537
  output_format: SAME
538
  transformer.h.15.attn.softmax:
539
+ approximation_function: NONE
540
  input_format: SAME
541
  instance: Softmax
542
  output_format: SAME
543
  transformer.h.15.ln_1:
544
+ approximation_function: NONE
545
  bias_format: SAME
546
  input_format: SAME
547
  instance: LayerNorm
548
  output_format: SAME
549
  weight_format: SAME
550
  transformer.h.15.ln_2:
551
+ approximation_function: NONE
552
  bias_format: SAME
553
  input_format: SAME
554
  instance: LayerNorm
555
  output_format: SAME
556
  weight_format: SAME
557
  transformer.h.15.mlp.act:
558
+ approximation_function: NONE
559
  input_format: SAME
560
  instance: GELU
561
  output_format: SAME
 
607
  instance: Dropout
608
  output_format: SAME
609
  transformer.h.16.attn.softmax:
610
+ approximation_function: NONE
611
  input_format: SAME
612
  instance: Softmax
613
  output_format: SAME
614
  transformer.h.16.ln_1:
615
+ approximation_function: NONE
616
  bias_format: SAME
617
  input_format: SAME
618
  instance: LayerNorm
619
  output_format: SAME
620
  weight_format: SAME
621
  transformer.h.16.ln_2:
622
+ approximation_function: NONE
623
  bias_format: SAME
624
  input_format: SAME
625
  instance: LayerNorm
626
  output_format: SAME
627
  weight_format: SAME
628
  transformer.h.16.mlp.act:
629
+ approximation_function: NONE
630
  input_format: SAME
631
  instance: GELU
632
  output_format: SAME
 
678
  instance: Dropout
679
  output_format: SAME
680
  transformer.h.17.attn.softmax:
681
+ approximation_function: NONE
682
  input_format: SAME
683
  instance: Softmax
684
  output_format: SAME
685
  transformer.h.17.ln_1:
686
+ approximation_function: NONE
687
  bias_format: SAME
688
  input_format: SAME
689
  instance: LayerNorm
690
  output_format: SAME
691
  weight_format: SAME
692
  transformer.h.17.ln_2:
693
+ approximation_function: NONE
694
  bias_format: SAME
695
  input_format: SAME
696
  instance: LayerNorm
697
  output_format: SAME
698
  weight_format: SAME
699
  transformer.h.17.mlp.act:
700
+ approximation_function: NONE
701
  input_format: SAME
702
  instance: GELU
703
  output_format: SAME
 
749
  instance: Dropout
750
  output_format: SAME
751
  transformer.h.18.attn.softmax:
752
+ approximation_function: NONE
753
  input_format: SAME
754
  instance: Softmax
755
  output_format: SAME
756
  transformer.h.18.ln_1:
757
+ approximation_function: NONE
758
  bias_format: SAME
759
  input_format: SAME
760
  instance: LayerNorm
761
  output_format: SAME
762
  weight_format: SAME
763
  transformer.h.18.ln_2:
764
+ approximation_function: NONE
765
  bias_format: SAME
766
  input_format: SAME
767
  instance: LayerNorm
768
  output_format: SAME
769
  weight_format: SAME
770
  transformer.h.18.mlp.act:
771
+ approximation_function: NONE
772
  input_format: SAME
773
  instance: GELU
774
  output_format: SAME
 
820
  instance: Dropout
821
  output_format: SAME
822
  transformer.h.19.attn.softmax:
823
+ approximation_function: NONE
824
  input_format: SAME
825
  instance: Softmax
826
  output_format: SAME
827
  transformer.h.19.ln_1:
828
+ approximation_function: NONE
829
  bias_format: SAME
830
  input_format: SAME
831
  instance: LayerNorm
832
  output_format: SAME
833
  weight_format: SAME
834
  transformer.h.19.ln_2:
835
+ approximation_function: NONE
836
  bias_format: SAME
837
  input_format: SAME
838
  instance: LayerNorm
839
  output_format: SAME
840
  weight_format: SAME
841
  transformer.h.19.mlp.act:
842
+ approximation_function: NONE
843
  input_format: SAME
844
  instance: GELU
845
  output_format: SAME
 
891
  instance: Dropout
892
  output_format: SAME
893
  transformer.h.2.attn.softmax:
894
+ approximation_function: NONE
895
  input_format: SAME
896
  instance: Softmax
897
  output_format: SAME
898
  transformer.h.2.ln_1:
899
+ approximation_function: NONE
900
  bias_format: SAME
901
  input_format: SAME
902
  instance: LayerNorm
903
  output_format: SAME
904
  weight_format: SAME
905
  transformer.h.2.ln_2:
906
+ approximation_function: NONE
907
  bias_format: SAME
908
  input_format: SAME
909
  instance: LayerNorm
910
  output_format: SAME
911
  weight_format: SAME
912
  transformer.h.2.mlp.act:
913
+ approximation_function: NONE
914
  input_format: SAME
915
  instance: GELU
916
  output_format: SAME
 
962
  instance: Dropout
963
  output_format: SAME
964
  transformer.h.20.attn.softmax:
965
+ approximation_function: NONE
966
  input_format: SAME
967
  instance: Softmax
968
  output_format: SAME
969
  transformer.h.20.ln_1:
970
+ approximation_function: NONE
971
  bias_format: SAME
972
  input_format: SAME
973
  instance: LayerNorm
974
  output_format: SAME
975
  weight_format: SAME
976
  transformer.h.20.ln_2:
977
+ approximation_function: NONE
978
  bias_format: SAME
979
  input_format: SAME
980
  instance: LayerNorm
981
  output_format: SAME
982
  weight_format: SAME
983
  transformer.h.20.mlp.act:
984
+ approximation_function: NONE
985
  input_format: SAME
986
  instance: GELU
987
  output_format: SAME
 
1033
  instance: Dropout
1034
  output_format: SAME
1035
  transformer.h.21.attn.softmax:
1036
+ approximation_function: NONE
1037
  input_format: SAME
1038
  instance: Softmax
1039
  output_format: SAME
1040
  transformer.h.21.ln_1:
1041
+ approximation_function: NONE
1042
  bias_format: SAME
1043
  input_format: SAME
1044
  instance: LayerNorm
1045
  output_format: SAME
1046
  weight_format: SAME
1047
  transformer.h.21.ln_2:
1048
+ approximation_function: NONE
1049
  bias_format: SAME
1050
  input_format: SAME
1051
  instance: LayerNorm
1052
  output_format: SAME
1053
  weight_format: SAME
1054
  transformer.h.21.mlp.act:
1055
+ approximation_function: NONE
1056
  input_format: SAME
1057
  instance: GELU
1058
  output_format: SAME
 
1104
  instance: Dropout
1105
  output_format: SAME
1106
  transformer.h.22.attn.softmax:
1107
+ approximation_function: NONE
1108
  input_format: SAME
1109
  instance: Softmax
1110
  output_format: SAME
1111
  transformer.h.22.ln_1:
1112
+ approximation_function: NONE
1113
  bias_format: SAME
1114
  input_format: SAME
1115
  instance: LayerNorm
1116
  output_format: SAME
1117
  weight_format: SAME
1118
  transformer.h.22.ln_2:
1119
+ approximation_function: NONE
1120
  bias_format: SAME
1121
  input_format: SAME
1122
  instance: LayerNorm
1123
  output_format: SAME
1124
  weight_format: SAME
1125
  transformer.h.22.mlp.act:
1126
+ approximation_function: NONE
1127
  input_format: SAME
1128
  instance: GELU
1129
  output_format: SAME
 
1175
  instance: Dropout
1176
  output_format: SAME
1177
  transformer.h.23.attn.softmax:
1178
+ approximation_function: NONE
1179
  input_format: SAME
1180
  instance: Softmax
1181
  output_format: SAME
1182
  transformer.h.23.ln_1:
1183
+ approximation_function: NONE
1184
  bias_format: SAME
1185
  input_format: SAME
1186
  instance: LayerNorm
1187
  output_format: SAME
1188
  weight_format: SAME
1189
  transformer.h.23.ln_2:
1190
+ approximation_function: NONE
1191
  bias_format: SAME
1192
  input_format: SAME
1193
  instance: LayerNorm
1194
  output_format: SAME
1195
  weight_format: SAME
1196
  transformer.h.23.mlp.act:
1197
+ approximation_function: NONE
1198
  input_format: SAME
1199
  instance: GELU
1200
  output_format: SAME
 
1246
  instance: Dropout
1247
  output_format: SAME
1248
  transformer.h.24.attn.softmax:
1249
+ approximation_function: NONE
1250
  input_format: SAME
1251
  instance: Softmax
1252
  output_format: SAME
1253
  transformer.h.24.ln_1:
1254
+ approximation_function: NONE
1255
  bias_format: SAME
1256
  input_format: SAME
1257
  instance: LayerNorm
1258
  output_format: SAME
1259
  weight_format: SAME
1260
  transformer.h.24.ln_2:
1261
+ approximation_function: NONE
1262
  bias_format: SAME
1263
  input_format: SAME
1264
  instance: LayerNorm
1265
  output_format: SAME
1266
  weight_format: SAME
1267
  transformer.h.24.mlp.act:
1268
+ approximation_function: NONE
1269
  input_format: SAME
1270
  instance: GELU
1271
  output_format: SAME
 
1317
  instance: Dropout
1318
  output_format: SAME
1319
  transformer.h.25.attn.softmax:
1320
+ approximation_function: NONE
1321
  input_format: SAME
1322
  instance: Softmax
1323
  output_format: SAME
1324
  transformer.h.25.ln_1:
1325
+ approximation_function: NONE
1326
  bias_format: SAME
1327
  input_format: SAME
1328
  instance: LayerNorm
1329
  output_format: SAME
1330
  weight_format: SAME
1331
  transformer.h.25.ln_2:
1332
+ approximation_function: NONE
1333
  bias_format: SAME
1334
  input_format: SAME
1335
  instance: LayerNorm
1336
  output_format: SAME
1337
  weight_format: SAME
1338
  transformer.h.25.mlp.act:
1339
+ approximation_function: NONE
1340
  input_format: SAME
1341
  instance: GELU
1342
  output_format: SAME
 
1388
  instance: Dropout
1389
  output_format: SAME
1390
  transformer.h.26.attn.softmax:
1391
+ approximation_function: NONE
1392
  input_format: SAME
1393
  instance: Softmax
1394
  output_format: SAME
1395
  transformer.h.26.ln_1:
1396
+ approximation_function: NONE
1397
  bias_format: SAME
1398
  input_format: SAME
1399
  instance: LayerNorm
1400
  output_format: SAME
1401
  weight_format: SAME
1402
  transformer.h.26.ln_2:
1403
+ approximation_function: NONE
1404
  bias_format: SAME
1405
  input_format: SAME
1406
  instance: LayerNorm
1407
  output_format: SAME
1408
  weight_format: SAME
1409
  transformer.h.26.mlp.act:
1410
+ approximation_function: NONE
1411
  input_format: SAME
1412
  instance: GELU
1413
  output_format: SAME
 
1459
  instance: Dropout
1460
  output_format: SAME
1461
  transformer.h.27.attn.softmax:
1462
+ approximation_function: NONE
1463
  input_format: SAME
1464
  instance: Softmax
1465
  output_format: SAME
1466
  transformer.h.27.ln_1:
1467
+ approximation_function: NONE
1468
  bias_format: SAME
1469
  input_format: SAME
1470
  instance: LayerNorm
1471
  output_format: SAME
1472
  weight_format: SAME
1473
  transformer.h.27.ln_2:
1474
+ approximation_function: NONE
1475
  bias_format: SAME
1476
  input_format: SAME
1477
  instance: LayerNorm
1478
  output_format: SAME
1479
  weight_format: SAME
1480
  transformer.h.27.mlp.act:
1481
+ approximation_function: NONE
1482
  input_format: SAME
1483
  instance: GELU
1484
  output_format: SAME
 
1530
  instance: Dropout
1531
  output_format: SAME
1532
  transformer.h.28.attn.softmax:
1533
+ approximation_function: NONE
1534
  input_format: SAME
1535
  instance: Softmax
1536
  output_format: SAME
1537
  transformer.h.28.ln_1:
1538
+ approximation_function: NONE
1539
  bias_format: SAME
1540
  input_format: SAME
1541
  instance: LayerNorm
1542
  output_format: SAME
1543
  weight_format: SAME
1544
  transformer.h.28.ln_2:
1545
+ approximation_function: NONE
1546
  bias_format: SAME
1547
  input_format: SAME
1548
  instance: LayerNorm
1549
  output_format: SAME
1550
  weight_format: SAME
1551
  transformer.h.28.mlp.act:
1552
+ approximation_function: NONE
1553
  input_format: SAME
1554
  instance: GELU
1555
  output_format: SAME
 
1601
  instance: Dropout
1602
  output_format: SAME
1603
  transformer.h.29.attn.softmax:
1604
+ approximation_function: NONE
1605
  input_format: SAME
1606
  instance: Softmax
1607
  output_format: SAME
1608
  transformer.h.29.ln_1:
1609
+ approximation_function: NONE
1610
  bias_format: SAME
1611
  input_format: SAME
1612
  instance: LayerNorm
1613
  output_format: SAME
1614
  weight_format: SAME
1615
  transformer.h.29.ln_2:
1616
+ approximation_function: NONE
1617
  bias_format: SAME
1618
  input_format: SAME
1619
  instance: LayerNorm
1620
  output_format: SAME
1621
  weight_format: SAME
1622
  transformer.h.29.mlp.act:
1623
+ approximation_function: NONE
1624
  input_format: SAME
1625
  instance: GELU
1626
  output_format: SAME
 
1672
  instance: Dropout
1673
  output_format: SAME
1674
  transformer.h.3.attn.softmax:
1675
+ approximation_function: NONE
1676
  input_format: SAME
1677
  instance: Softmax
1678
  output_format: SAME
1679
  transformer.h.3.ln_1:
1680
+ approximation_function: NONE
1681
  bias_format: SAME
1682
  input_format: SAME
1683
  instance: LayerNorm
1684
  output_format: SAME
1685
  weight_format: SAME
1686
  transformer.h.3.ln_2:
1687
+ approximation_function: NONE
1688
  bias_format: SAME
1689
  input_format: SAME
1690
  instance: LayerNorm
1691
  output_format: SAME
1692
  weight_format: SAME
1693
  transformer.h.3.mlp.act:
1694
+ approximation_function: NONE
1695
  input_format: SAME
1696
  instance: GELU
1697
  output_format: SAME
 
1743
  instance: Dropout
1744
  output_format: SAME
1745
  transformer.h.30.attn.softmax:
1746
+ approximation_function: NONE
1747
  input_format: SAME
1748
  instance: Softmax
1749
  output_format: SAME
1750
  transformer.h.30.ln_1:
1751
+ approximation_function: NONE
1752
  bias_format: SAME
1753
  input_format: SAME
1754
  instance: LayerNorm
1755
  output_format: SAME
1756
  weight_format: SAME
1757
  transformer.h.30.ln_2:
1758
+ approximation_function: NONE
1759
  bias_format: SAME
1760
  input_format: SAME
1761
  instance: LayerNorm
1762
  output_format: SAME
1763
  weight_format: SAME
1764
  transformer.h.30.mlp.act:
1765
+ approximation_function: NONE
1766
  input_format: SAME
1767
  instance: GELU
1768
  output_format: SAME
 
1814
  instance: Dropout
1815
  output_format: SAME
1816
  transformer.h.31.attn.softmax:
1817
+ approximation_function: NONE
1818
  input_format: SAME
1819
  instance: Softmax
1820
  output_format: SAME
1821
  transformer.h.31.ln_1:
1822
+ approximation_function: NONE
1823
  bias_format: SAME
1824
  input_format: SAME
1825
  instance: LayerNorm
1826
  output_format: SAME
1827
  weight_format: SAME
1828
  transformer.h.31.ln_2:
1829
+ approximation_function: NONE
1830
  bias_format: SAME
1831
  input_format: SAME
1832
  instance: LayerNorm
1833
  output_format: SAME
1834
  weight_format: SAME
1835
  transformer.h.31.mlp.act:
1836
+ approximation_function: NONE
1837
  input_format: SAME
1838
  instance: GELU
1839
  output_format: SAME
 
1885
  instance: Dropout
1886
  output_format: SAME
1887
  transformer.h.32.attn.softmax:
1888
+ approximation_function: NONE
1889
  input_format: SAME
1890
  instance: Softmax
1891
  output_format: SAME
1892
  transformer.h.32.ln_1:
1893
+ approximation_function: NONE
1894
  bias_format: SAME
1895
  input_format: SAME
1896
  instance: LayerNorm
1897
  output_format: SAME
1898
  weight_format: SAME
1899
  transformer.h.32.ln_2:
1900
+ approximation_function: NONE
1901
  bias_format: SAME
1902
  input_format: SAME
1903
  instance: LayerNorm
1904
  output_format: SAME
1905
  weight_format: SAME
1906
  transformer.h.32.mlp.act:
1907
+ approximation_function: NONE
1908
  input_format: SAME
1909
  instance: GELU
1910
  output_format: SAME
 
1956
  instance: Dropout
1957
  output_format: SAME
1958
  transformer.h.33.attn.softmax:
1959
+ approximation_function: NONE
1960
  input_format: SAME
1961
  instance: Softmax
1962
  output_format: SAME
1963
  transformer.h.33.ln_1:
1964
+ approximation_function: NONE
1965
  bias_format: SAME
1966
  input_format: SAME
1967
  instance: LayerNorm
1968
  output_format: SAME
1969
  weight_format: SAME
1970
  transformer.h.33.ln_2:
1971
+ approximation_function: NONE
1972
  bias_format: SAME
1973
  input_format: SAME
1974
  instance: LayerNorm
1975
  output_format: SAME
1976
  weight_format: SAME
1977
  transformer.h.33.mlp.act:
1978
+ approximation_function: NONE
1979
  input_format: SAME
1980
  instance: GELU
1981
  output_format: SAME
 
2027
  instance: Dropout
2028
  output_format: SAME
2029
  transformer.h.34.attn.softmax:
2030
+ approximation_function: NONE
2031
  input_format: SAME
2032
  instance: Softmax
2033
  output_format: SAME
2034
  transformer.h.34.ln_1:
2035
+ approximation_function: NONE
2036
  bias_format: SAME
2037
  input_format: SAME
2038
  instance: LayerNorm
2039
  output_format: SAME
2040
  weight_format: SAME
2041
  transformer.h.34.ln_2:
2042
+ approximation_function: NONE
2043
  bias_format: SAME
2044
  input_format: SAME
2045
  instance: LayerNorm
2046
  output_format: SAME
2047
  weight_format: SAME
2048
  transformer.h.34.mlp.act:
2049
+ approximation_function: NONE
2050
  input_format: SAME
2051
  instance: GELU
2052
  output_format: SAME
 
2098
  instance: Dropout
2099
  output_format: SAME
2100
  transformer.h.35.attn.softmax:
2101
+ approximation_function: NONE
2102
  input_format: SAME
2103
  instance: Softmax
2104
  output_format: SAME
2105
  transformer.h.35.ln_1:
2106
+ approximation_function: NONE
2107
  bias_format: SAME
2108
  input_format: SAME
2109
  instance: LayerNorm
2110
  output_format: SAME
2111
  weight_format: SAME
2112
  transformer.h.35.ln_2:
2113
+ approximation_function: NONE
2114
  bias_format: SAME
2115
  input_format: SAME
2116
  instance: LayerNorm
2117
  output_format: SAME
2118
  weight_format: SAME
2119
  transformer.h.35.mlp.act:
2120
+ approximation_function: NONE
2121
  input_format: SAME
2122
  instance: GELU
2123
  output_format: SAME
 
2169
  instance: Dropout
2170
  output_format: SAME
2171
  transformer.h.36.attn.softmax:
2172
+ approximation_function: NONE
2173
  input_format: SAME
2174
  instance: Softmax
2175
  output_format: SAME
2176
  transformer.h.36.ln_1:
2177
+ approximation_function: NONE
2178
  bias_format: SAME
2179
  input_format: SAME
2180
  instance: LayerNorm
2181
  output_format: SAME
2182
  weight_format: SAME
2183
  transformer.h.36.ln_2:
2184
+ approximation_function: NONE
2185
  bias_format: SAME
2186
  input_format: SAME
2187
  instance: LayerNorm
2188
  output_format: SAME
2189
  weight_format: SAME
2190
  transformer.h.36.mlp.act:
2191
+ approximation_function: NONE
2192
  input_format: SAME
2193
  instance: GELU
2194
  output_format: SAME
 
2240
  instance: Dropout
2241
  output_format: SAME
2242
  transformer.h.37.attn.softmax:
2243
+ approximation_function: NONE
2244
  input_format: SAME
2245
  instance: Softmax
2246
  output_format: SAME
2247
  transformer.h.37.ln_1:
2248
+ approximation_function: NONE
2249
  bias_format: SAME
2250
  input_format: SAME
2251
  instance: LayerNorm
2252
  output_format: SAME
2253
  weight_format: SAME
2254
  transformer.h.37.ln_2:
2255
+ approximation_function: NONE
2256
  bias_format: SAME
2257
  input_format: SAME
2258
  instance: LayerNorm
2259
  output_format: SAME
2260
  weight_format: SAME
2261
  transformer.h.37.mlp.act:
2262
+ approximation_function: NONE
2263
  input_format: SAME
2264
  instance: GELU
2265
  output_format: SAME
 
2311
  instance: Dropout
2312
  output_format: SAME
2313
  transformer.h.38.attn.softmax:
2314
+ approximation_function: NONE
2315
  input_format: SAME
2316
  instance: Softmax
2317
  output_format: SAME
2318
  transformer.h.38.ln_1:
2319
+ approximation_function: NONE
2320
  bias_format: SAME
2321
  input_format: SAME
2322
  instance: LayerNorm
2323
  output_format: SAME
2324
  weight_format: SAME
2325
  transformer.h.38.ln_2:
2326
+ approximation_function: NONE
2327
  bias_format: SAME
2328
  input_format: SAME
2329
  instance: LayerNorm
2330
  output_format: SAME
2331
  weight_format: SAME
2332
  transformer.h.38.mlp.act:
2333
+ approximation_function: NONE
2334
  input_format: SAME
2335
  instance: GELU
2336
  output_format: SAME
 
2382
  instance: Dropout
2383
  output_format: SAME
2384
  transformer.h.39.attn.softmax:
2385
+ approximation_function: NONE
2386
  input_format: SAME
2387
  instance: Softmax
2388
  output_format: SAME
2389
  transformer.h.39.ln_1:
2390
+ approximation_function: NONE
2391
  bias_format: SAME
2392
  input_format: SAME
2393
  instance: LayerNorm
2394
  output_format: SAME
2395
  weight_format: SAME
2396
  transformer.h.39.ln_2:
2397
+ approximation_function: NONE
2398
  bias_format: SAME
2399
  input_format: SAME
2400
  instance: LayerNorm
2401
  output_format: SAME
2402
  weight_format: SAME
2403
  transformer.h.39.mlp.act:
2404
+ approximation_function: NONE
2405
  input_format: SAME
2406
  instance: GELU
2407
  output_format: SAME
 
2453
  instance: Dropout
2454
  output_format: SAME
2455
  transformer.h.4.attn.softmax:
2456
+ approximation_function: NONE
2457
  input_format: SAME
2458
  instance: Softmax
2459
  output_format: SAME
2460
  transformer.h.4.ln_1:
2461
+ approximation_function: NONE
2462
  bias_format: SAME
2463
  input_format: SAME
2464
  instance: LayerNorm
2465
  output_format: SAME
2466
  weight_format: SAME
2467
  transformer.h.4.ln_2:
2468
+ approximation_function: NONE
2469
  bias_format: SAME
2470
  input_format: SAME
2471
  instance: LayerNorm
2472
  output_format: SAME
2473
  weight_format: SAME
2474
  transformer.h.4.mlp.act:
2475
+ approximation_function: NONE
2476
  input_format: SAME
2477
  instance: GELU
2478
  output_format: SAME
 
2524
  instance: Dropout
2525
  output_format: SAME
2526
  transformer.h.40.attn.softmax:
2527
+ approximation_function: NONE
2528
  input_format: SAME
2529
  instance: Softmax
2530
  output_format: SAME
2531
  transformer.h.40.ln_1:
2532
+ approximation_function: NONE
2533
  bias_format: SAME
2534
  input_format: SAME
2535
  instance: LayerNorm
2536
  output_format: SAME
2537
  weight_format: SAME
2538
  transformer.h.40.ln_2:
2539
+ approximation_function: NONE
2540
  bias_format: SAME
2541
  input_format: SAME
2542
  instance: LayerNorm
2543
  output_format: SAME
2544
  weight_format: SAME
2545
  transformer.h.40.mlp.act:
2546
+ approximation_function: NONE
2547
  input_format: SAME
2548
  instance: GELU
2549
  output_format: SAME
 
2595
  instance: Dropout
2596
  output_format: SAME
2597
  transformer.h.41.attn.softmax:
2598
+ approximation_function: NONE
2599
  input_format: SAME
2600
  instance: Softmax
2601
  output_format: SAME
2602
  transformer.h.41.ln_1:
2603
+ approximation_function: NONE
2604
  bias_format: SAME
2605
  input_format: SAME
2606
  instance: LayerNorm
2607
  output_format: SAME
2608
  weight_format: SAME
2609
  transformer.h.41.ln_2:
2610
+ approximation_function: NONE
2611
  bias_format: SAME
2612
  input_format: SAME
2613
  instance: LayerNorm
2614
  output_format: SAME
2615
  weight_format: SAME
2616
  transformer.h.41.mlp.act:
2617
+ approximation_function: NONE
2618
  input_format: SAME
2619
  instance: GELU
2620
  output_format: SAME
 
2666
  instance: Dropout
2667
  output_format: SAME
2668
  transformer.h.42.attn.softmax:
2669
+ approximation_function: NONE
2670
  input_format: SAME
2671
  instance: Softmax
2672
  output_format: SAME
2673
  transformer.h.42.ln_1:
2674
+ approximation_function: NONE
2675
  bias_format: SAME
2676
  input_format: SAME
2677
  instance: LayerNorm
2678
  output_format: SAME
2679
  weight_format: SAME
2680
  transformer.h.42.ln_2:
2681
+ approximation_function: NONE
2682
  bias_format: SAME
2683
  input_format: SAME
2684
  instance: LayerNorm
2685
  output_format: SAME
2686
  weight_format: SAME
2687
  transformer.h.42.mlp.act:
2688
+ approximation_function: NONE
2689
  input_format: SAME
2690
  instance: GELU
2691
  output_format: SAME
 
2737
  instance: Dropout
2738
  output_format: SAME
2739
  transformer.h.43.attn.softmax:
2740
+ approximation_function: NONE
2741
  input_format: SAME
2742
  instance: Softmax
2743
  output_format: SAME
2744
  transformer.h.43.ln_1:
2745
+ approximation_function: NONE
2746
  bias_format: SAME
2747
  input_format: SAME
2748
  instance: LayerNorm
2749
  output_format: SAME
2750
  weight_format: SAME
2751
  transformer.h.43.ln_2:
2752
+ approximation_function: NONE
2753
  bias_format: SAME
2754
  input_format: SAME
2755
  instance: LayerNorm
2756
  output_format: SAME
2757
  weight_format: SAME
2758
  transformer.h.43.mlp.act:
2759
+ approximation_function: NONE
2760
  input_format: SAME
2761
  instance: GELU
2762
  output_format: SAME
 
2808
  instance: Dropout
2809
  output_format: SAME
2810
  transformer.h.44.attn.softmax:
2811
+ approximation_function: NONE
2812
  input_format: SAME
2813
  instance: Softmax
2814
  output_format: SAME
2815
  transformer.h.44.ln_1:
2816
+ approximation_function: NONE
2817
  bias_format: SAME
2818
  input_format: SAME
2819
  instance: LayerNorm
2820
  output_format: SAME
2821
  weight_format: SAME
2822
  transformer.h.44.ln_2:
2823
+ approximation_function: NONE
2824
  bias_format: SAME
2825
  input_format: SAME
2826
  instance: LayerNorm
2827
  output_format: SAME
2828
  weight_format: SAME
2829
  transformer.h.44.mlp.act:
2830
+ approximation_function: NONE
2831
  input_format: SAME
2832
  instance: GELU
2833
  output_format: SAME
 
2879
  instance: Dropout
2880
  output_format: SAME
2881
  transformer.h.45.attn.softmax:
2882
+ approximation_function: NONE
2883
  input_format: SAME
2884
  instance: Softmax
2885
  output_format: SAME
2886
  transformer.h.45.ln_1:
2887
+ approximation_function: NONE
2888
  bias_format: SAME
2889
  input_format: SAME
2890
  instance: LayerNorm
2891
  output_format: SAME
2892
  weight_format: SAME
2893
  transformer.h.45.ln_2:
2894
+ approximation_function: NONE
2895
  bias_format: SAME
2896
  input_format: SAME
2897
  instance: LayerNorm
2898
  output_format: SAME
2899
  weight_format: SAME
2900
  transformer.h.45.mlp.act:
2901
+ approximation_function: NONE
2902
  input_format: SAME
2903
  instance: GELU
2904
  output_format: SAME
 
2950
  instance: Dropout
2951
  output_format: SAME
2952
  transformer.h.46.attn.softmax:
2953
+ approximation_function: NONE
2954
  input_format: SAME
2955
  instance: Softmax
2956
  output_format: SAME
2957
  transformer.h.46.ln_1:
2958
+ approximation_function: NONE
2959
  bias_format: SAME
2960
  input_format: SAME
2961
  instance: LayerNorm
2962
  output_format: SAME
2963
  weight_format: SAME
2964
  transformer.h.46.ln_2:
2965
+ approximation_function: NONE
2966
  bias_format: SAME
2967
  input_format: SAME
2968
  instance: LayerNorm
2969
  output_format: SAME
2970
  weight_format: SAME
2971
  transformer.h.46.mlp.act:
2972
+ approximation_function: NONE
2973
  input_format: SAME
2974
  instance: GELU
2975
  output_format: SAME
 
3021
  instance: Dropout
3022
  output_format: SAME
3023
  transformer.h.47.attn.softmax:
3024
+ approximation_function: NONE
3025
  input_format: SAME
3026
  instance: Softmax
3027
  output_format: SAME
3028
  transformer.h.47.ln_1:
3029
+ approximation_function: NONE
3030
  bias_format: SAME
3031
  input_format: SAME
3032
  instance: LayerNorm
3033
  output_format: SAME
3034
  weight_format: SAME
3035
  transformer.h.47.ln_2:
3036
+ approximation_function: NONE
3037
  bias_format: SAME
3038
  input_format: SAME
3039
  instance: LayerNorm
3040
  output_format: SAME
3041
  weight_format: SAME
3042
  transformer.h.47.mlp.act:
3043
+ approximation_function: NONE
3044
  input_format: SAME
3045
  instance: GELU
3046
  output_format: SAME
 
3092
  instance: Dropout
3093
  output_format: SAME
3094
  transformer.h.5.attn.softmax:
3095
+ approximation_function: NONE
3096
  input_format: SAME
3097
  instance: Softmax
3098
  output_format: SAME
3099
  transformer.h.5.ln_1:
3100
+ approximation_function: NONE
3101
  bias_format: SAME
3102
  input_format: SAME
3103
  instance: LayerNorm
3104
  output_format: SAME
3105
  weight_format: SAME
3106
  transformer.h.5.ln_2:
3107
+ approximation_function: NONE
3108
  bias_format: SAME
3109
  input_format: SAME
3110
  instance: LayerNorm
3111
  output_format: SAME
3112
  weight_format: SAME
3113
  transformer.h.5.mlp.act:
3114
+ approximation_function: NONE
3115
  input_format: SAME
3116
  instance: GELU
3117
  output_format: SAME
 
3163
  instance: Dropout
3164
  output_format: SAME
3165
  transformer.h.6.attn.softmax:
3166
+ approximation_function: NONE
3167
  input_format: SAME
3168
  instance: Softmax
3169
  output_format: SAME
3170
  transformer.h.6.ln_1:
3171
+ approximation_function: NONE
3172
  bias_format: SAME
3173
  input_format: SAME
3174
  instance: LayerNorm
3175
  output_format: SAME
3176
  weight_format: SAME
3177
  transformer.h.6.ln_2:
3178
+ approximation_function: NONE
3179
  bias_format: SAME
3180
  input_format: SAME
3181
  instance: LayerNorm
3182
  output_format: SAME
3183
  weight_format: SAME
3184
  transformer.h.6.mlp.act:
3185
+ approximation_function: NONE
3186
  input_format: SAME
3187
  instance: GELU
3188
  output_format: SAME
 
3234
  instance: Dropout
3235
  output_format: SAME
3236
  transformer.h.7.attn.softmax:
3237
+ approximation_function: NONE
3238
  input_format: SAME
3239
  instance: Softmax
3240
  output_format: SAME
3241
  transformer.h.7.ln_1:
3242
+ approximation_function: NONE
3243
  bias_format: SAME
3244
  input_format: SAME
3245
  instance: LayerNorm
3246
  output_format: SAME
3247
  weight_format: SAME
3248
  transformer.h.7.ln_2:
3249
+ approximation_function: NONE
3250
  bias_format: SAME
3251
  input_format: SAME
3252
  instance: LayerNorm
3253
  output_format: SAME
3254
  weight_format: SAME
3255
  transformer.h.7.mlp.act:
3256
+ approximation_function: NONE
3257
  input_format: SAME
3258
  instance: GELU
3259
  output_format: SAME
 
3305
  instance: Dropout
3306
  output_format: SAME
3307
  transformer.h.8.attn.softmax:
3308
+ approximation_function: NONE
3309
  input_format: SAME
3310
  instance: Softmax
3311
  output_format: SAME
3312
  transformer.h.8.ln_1:
3313
+ approximation_function: NONE
3314
  bias_format: SAME
3315
  input_format: SAME
3316
  instance: LayerNorm
3317
  output_format: SAME
3318
  weight_format: SAME
3319
  transformer.h.8.ln_2:
3320
+ approximation_function: NONE
3321
  bias_format: SAME
3322
  input_format: SAME
3323
  instance: LayerNorm
3324
  output_format: SAME
3325
  weight_format: SAME
3326
  transformer.h.8.mlp.act:
3327
+ approximation_function: NONE
3328
  input_format: SAME
3329
  instance: GELU
3330
  output_format: SAME
 
3376
  instance: Dropout
3377
  output_format: SAME
3378
  transformer.h.9.attn.softmax:
3379
+ approximation_function: NONE
3380
  input_format: SAME
3381
  instance: Softmax
3382
  output_format: SAME
3383
  transformer.h.9.ln_1:
3384
+ approximation_function: NONE
3385
  bias_format: SAME
3386
  input_format: SAME
3387
  instance: LayerNorm
3388
  output_format: SAME
3389
  weight_format: SAME
3390
  transformer.h.9.ln_2:
3391
+ approximation_function: NONE
3392
  bias_format: SAME
3393
  input_format: SAME
3394
  instance: LayerNorm
3395
  output_format: SAME
3396
  weight_format: SAME
3397
  transformer.h.9.mlp.act:
3398
+ approximation_function: NONE
3399
  input_format: SAME
3400
  instance: GELU
3401
  output_format: SAME
 
3421
  instance: Dropout
3422
  output_format: SAME
3423
  transformer.ln_f:
3424
+ approximation_function: NONE
3425
  bias_format: SAME
3426
  input_format: SAME
3427
  instance: LayerNorm