File size: 273,641 Bytes
f056d10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
program(1.3)
[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3404.16.1"}, {"coremlc-version", "3404.23.1"}})]
{
    func infer<ios18>(tensor<fp16, [1, 1, 1, 1024]> causal_mask, tensor<int32, [1]> current_pos, tensor<fp16, [1, 1, 4096]> hidden_states, state<tensor<fp16, [64, 8, 1024, 128]>> model_model_kv_cache_0, tensor<int32, [1]> position_ids) {
            tensor<fp16, [4096, 4096, 1, 1]> model_model_layers_12_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [4096, 4096, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor<fp16, [512, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12583040))))[name = string("model_model_layers_12_self_attn_q_proj_weight_palettized")];
            tensor<fp16, [1024, 4096, 1, 1]> model_model_layers_12_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [1024, 4096, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12648640))), lut = tensor<fp16, [128, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15794432))))[name = string("model_model_layers_12_self_attn_k_proj_weight_palettized")];
            tensor<fp16, [1024, 4096, 1, 1]> model_model_layers_12_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [1024, 4096, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15810880))), lut = tensor<fp16, [128, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18956672))))[name = string("model_model_layers_12_self_attn_v_proj_weight_palettized")];
            tensor<fp16, [14336, 4096, 1, 1]> model_model_layers_12_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [14336, 4096, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18973120))), lut = tensor<fp16, [1792, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63013376))))[name = string("model_model_layers_12_mlp_gate_proj_weight_palettized")];
            tensor<fp16, [14336, 4096, 1, 1]> model_model_layers_12_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [14336, 4096, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63242816))), lut = tensor<fp16, [1792, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107283072))))[name = string("model_model_layers_12_mlp_up_proj_weight_palettized")];
            tensor<fp16, [4096, 14336, 1, 1]> model_model_layers_12_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [4096, 14336, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107512512))), lut = tensor<fp16, [512, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151552768))))[name = string("model_model_layers_12_mlp_down_proj_weight_palettized")];
            tensor<fp16, [4096, 4096, 1, 1]> model_model_layers_13_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [4096, 4096, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151618368))), lut = tensor<fp16, [512, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(164201344))))[name = string("model_model_layers_13_self_attn_q_proj_weight_palettized")];
            tensor<fp16, [1024, 4096, 1, 1]> model_model_layers_13_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [1024, 4096, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(164266944))), lut = tensor<fp16, [128, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167412736))))[name = string("model_model_layers_13_self_attn_k_proj_weight_palettized")];
            tensor<fp16, [1024, 4096, 1, 1]> model_model_layers_13_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [1024, 4096, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167429184))), lut = tensor<fp16, [128, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170574976))))[name = string("model_model_layers_13_self_attn_v_proj_weight_palettized")];
            tensor<fp16, [14336, 4096, 1, 1]> model_model_layers_13_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [14336, 4096, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170591424))), lut = tensor<fp16, [1792, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214631680))))[name = string("model_model_layers_13_mlp_gate_proj_weight_palettized")];
            tensor<fp16, [14336, 4096, 1, 1]> model_model_layers_13_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [14336, 4096, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214861120))), lut = tensor<fp16, [1792, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(258901376))))[name = string("model_model_layers_13_mlp_up_proj_weight_palettized")];
            tensor<fp16, [4096, 14336, 1, 1]> model_model_layers_13_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [4096, 14336, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(259130816))), lut = tensor<fp16, [512, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303171072))))[name = string("model_model_layers_13_mlp_down_proj_weight_palettized")];
            tensor<fp16, [4096, 4096, 1, 1]> model_model_layers_14_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [4096, 4096, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303236672))), lut = tensor<fp16, [512, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315819648))))[name = string("model_model_layers_14_self_attn_q_proj_weight_palettized")];
            tensor<fp16, [1024, 4096, 1, 1]> model_model_layers_14_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [1024, 4096, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315885248))), lut = tensor<fp16, [128, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319031040))))[name = string("model_model_layers_14_self_attn_k_proj_weight_palettized")];
            tensor<fp16, [1024, 4096, 1, 1]> model_model_layers_14_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [1024, 4096, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319047488))), lut = tensor<fp16, [128, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322193280))))[name = string("model_model_layers_14_self_attn_v_proj_weight_palettized")];
            tensor<fp16, [14336, 4096, 1, 1]> model_model_layers_14_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [14336, 4096, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322209728))), lut = tensor<fp16, [1792, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366249984))))[name = string("model_model_layers_14_mlp_gate_proj_weight_palettized")];
            tensor<fp16, [14336, 4096, 1, 1]> model_model_layers_14_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [14336, 4096, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366479424))), lut = tensor<fp16, [1792, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410519680))))[name = string("model_model_layers_14_mlp_up_proj_weight_palettized")];
            tensor<fp16, [4096, 14336, 1, 1]> model_model_layers_14_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [4096, 14336, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410749120))), lut = tensor<fp16, [512, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(454789376))))[name = string("model_model_layers_14_mlp_down_proj_weight_palettized")];
            tensor<fp16, [4096, 4096, 1, 1]> model_model_layers_15_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [4096, 4096, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(454854976))), lut = tensor<fp16, [512, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(467437952))))[name = string("model_model_layers_15_self_attn_q_proj_weight_palettized")];
            tensor<fp16, [1024, 4096, 1, 1]> model_model_layers_15_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [1024, 4096, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(467503552))), lut = tensor<fp16, [128, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470649344))))[name = string("model_model_layers_15_self_attn_k_proj_weight_palettized")];
            tensor<fp16, [1024, 4096, 1, 1]> model_model_layers_15_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [1024, 4096, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470665792))), lut = tensor<fp16, [128, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473811584))))[name = string("model_model_layers_15_self_attn_v_proj_weight_palettized")];
            tensor<fp16, [14336, 4096, 1, 1]> model_model_layers_15_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [14336, 4096, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473828032))), lut = tensor<fp16, [1792, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(517868288))))[name = string("model_model_layers_15_mlp_gate_proj_weight_palettized")];
            tensor<fp16, [14336, 4096, 1, 1]> model_model_layers_15_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [14336, 4096, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518097728))), lut = tensor<fp16, [1792, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562137984))))[name = string("model_model_layers_15_mlp_up_proj_weight_palettized")];
            tensor<fp16, [4096, 14336, 1, 1]> model_model_layers_15_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [4096, 14336, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562367424))), lut = tensor<fp16, [512, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(606407680))))[name = string("model_model_layers_15_mlp_down_proj_weight_palettized")];
            int32 var_41 = const()[name = string("op_41"), val = int32(-1)];
            int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)];
            tensor<bool, [1]> greater_equal_0 = greater_equal(x = current_pos, y = greater_equal_0_y_0)[name = string("greater_equal_0")];
            int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(131072)];
            tensor<int32, [1]> add_0 = add(x = current_pos, y = slice_by_index_0)[name = string("add_0")];
            tensor<int32, [1]> select_0 = select(a = current_pos, b = add_0, cond = greater_equal_0)[name = string("select_0")];
            int32 var_150_axis_0 = const()[name = string("op_150_axis_0"), val = int32(1)];
            int32 var_150_batch_dims_0 = const()[name = string("op_150_batch_dims_0"), val = int32(0)];
            bool var_150_validate_indices_0 = const()[name = string("op_150_validate_indices_0"), val = bool(false)];
            tensor<fp16, [1, 131072, 128]> var_46_to_fp16 = const()[name = string("op_46_to_fp16"), val = tensor<fp16, [1, 131072, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(606473280)))];
            tensor<fp16, [1, 1, 128]> var_150_cast_fp16 = gather(axis = var_150_axis_0, batch_dims = var_150_batch_dims_0, indices = select_0, validate_indices = var_150_validate_indices_0, x = var_46_to_fp16)[name = string("op_150_cast_fp16")];
            tensor<int32, [4]> var_151 = const()[name = string("op_151"), val = tensor<int32, [4]>([1, 1, 1, -1])];
            tensor<fp16, [1, 1, 1, 128]> sin_1_cast_fp16 = reshape(shape = var_151, x = var_150_cast_fp16)[name = string("sin_1_cast_fp16")];
            int32 var_155_axis_0 = const()[name = string("op_155_axis_0"), val = int32(1)];
            int32 var_155_batch_dims_0 = const()[name = string("op_155_batch_dims_0"), val = int32(0)];
            bool var_155_validate_indices_0 = const()[name = string("op_155_validate_indices_0"), val = bool(false)];
            tensor<fp16, [1, 131072, 128]> var_40_to_fp16 = const()[name = string("op_40_to_fp16"), val = tensor<fp16, [1, 131072, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(640027776)))];
            tensor<fp16, [1, 1, 128]> var_155_cast_fp16 = gather(axis = var_155_axis_0, batch_dims = var_155_batch_dims_0, indices = select_0, validate_indices = var_155_validate_indices_0, x = var_40_to_fp16)[name = string("op_155_cast_fp16")];
            tensor<int32, [4]> var_156 = const()[name = string("op_156"), val = tensor<int32, [4]>([1, 1, 1, -1])];
            tensor<fp16, [1, 1, 1, 128]> cos_1_cast_fp16 = reshape(shape = var_156, x = var_155_cast_fp16)[name = string("cos_1_cast_fp16")];
            tensor<int32, [1]> mean_1_axes_0 = const()[name = string("mean_1_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_1_keep_dims_0 = const()[name = string("mean_1_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 1, 1]> mean_1_cast_fp16 = reduce_mean(axes = mean_1_axes_0, keep_dims = mean_1_keep_dims_0, x = hidden_states)[name = string("mean_1_cast_fp16")];
            tensor<fp16, [1, 1, 4096]> input_1_cast_fp16 = sub(x = hidden_states, y = mean_1_cast_fp16)[name = string("input_1_cast_fp16")];
            tensor<int32, [1]> var_164_axes_0 = const()[name = string("op_164_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [4096]> model_model_layers_12_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_12_input_layernorm_weight_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(673582272)))];
            fp16 var_36_to_fp16 = const()[name = string("op_36_to_fp16"), val = fp16(0x1.5p-17)];
            tensor<fp16, [1, 1, 4096]> var_164_cast_fp16 = layer_norm(axes = var_164_axes_0, epsilon = var_36_to_fp16, gamma = model_model_layers_12_input_layernorm_weight_to_fp16, x = input_1_cast_fp16)[name = string("op_164_cast_fp16")];
            tensor<int32, [3]> var_167 = const()[name = string("op_167"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> var_169_axes_0 = const()[name = string("op_169_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 4096, 1]> var_168 = transpose(perm = var_167, x = var_164_cast_fp16)[name = string("transpose_15")];
            tensor<fp16, [1, 4096, 1, 1]> var_169 = expand_dims(axes = var_169_axes_0, x = var_168)[name = string("op_169")];
            string var_176_pad_type_0 = const()[name = string("op_176_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> var_176_strides_0 = const()[name = string("op_176_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> var_176_pad_0 = const()[name = string("op_176_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> var_176_dilations_0 = const()[name = string("op_176_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 var_176_groups_0 = const()[name = string("op_176_groups_0"), val = int32(1)];
            tensor<fp16, [1, 4096, 1, 1]> var_176 = conv(dilations = var_176_dilations_0, groups = var_176_groups_0, pad = var_176_pad_0, pad_type = var_176_pad_type_0, strides = var_176_strides_0, weight = model_model_layers_12_self_attn_q_proj_weight_palettized, x = var_169)[name = string("op_176")];
            tensor<int32, [4]> var_177 = const()[name = string("op_177"), val = tensor<int32, [4]>([1, 32, 1, 128])];
            tensor<fp16, [1, 32, 1, 128]> var_178 = reshape(shape = var_177, x = var_176)[name = string("op_178")];
            string var_185_pad_type_0 = const()[name = string("op_185_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> var_185_strides_0 = const()[name = string("op_185_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> var_185_pad_0 = const()[name = string("op_185_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> var_185_dilations_0 = const()[name = string("op_185_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 var_185_groups_0 = const()[name = string("op_185_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 1]> var_185 = conv(dilations = var_185_dilations_0, groups = var_185_groups_0, pad = var_185_pad_0, pad_type = var_185_pad_type_0, strides = var_185_strides_0, weight = model_model_layers_12_self_attn_k_proj_weight_palettized, x = var_169)[name = string("op_185")];
            tensor<int32, [4]> var_186 = const()[name = string("op_186"), val = tensor<int32, [4]>([1, 8, 1, 128])];
            tensor<fp16, [1, 8, 1, 128]> var_187 = reshape(shape = var_186, x = var_185)[name = string("op_187")];
            string var_194_pad_type_0 = const()[name = string("op_194_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> var_194_strides_0 = const()[name = string("op_194_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> var_194_pad_0 = const()[name = string("op_194_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> var_194_dilations_0 = const()[name = string("op_194_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 var_194_groups_0 = const()[name = string("op_194_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 1]> var_194 = conv(dilations = var_194_dilations_0, groups = var_194_groups_0, pad = var_194_pad_0, pad_type = var_194_pad_type_0, strides = var_194_strides_0, weight = model_model_layers_12_self_attn_v_proj_weight_palettized, x = var_169)[name = string("op_194")];
            tensor<int32, [4]> var_195 = const()[name = string("op_195"), val = tensor<int32, [4]>([1, 8, 1, 128])];
            tensor<fp16, [1, 8, 1, 128]> var_196 = reshape(shape = var_195, x = var_194)[name = string("op_196")];
            tensor<int32, [4]> x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor<int32, [4]>([1, 32, 1, 64])];
            tensor<bool, [4]> x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 32, 1, 64]> x1_1 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = var_178)[name = string("x1_1")];
            tensor<int32, [4]> x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor<int32, [4]>([1, 32, 1, 128])];
            tensor<bool, [4]> x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 32, 1, 64]> x2_1 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = var_178)[name = string("x2_1")];
            tensor<int32, [4]> cos_3_begin_0 = const()[name = string("cos_3_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> cos_3_end_0 = const()[name = string("cos_3_end_0"), val = tensor<int32, [4]>([1, 1, 1, 64])];
            tensor<bool, [4]> cos_3_end_mask_0 = const()[name = string("cos_3_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 1, 1, 64]> cos_3_cast_fp16 = slice_by_index(begin = cos_3_begin_0, end = cos_3_end_0, end_mask = cos_3_end_mask_0, x = cos_1_cast_fp16)[name = string("cos_3_cast_fp16")];
            tensor<int32, [4]> sin_3_begin_0 = const()[name = string("sin_3_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> sin_3_end_0 = const()[name = string("sin_3_end_0"), val = tensor<int32, [4]>([1, 1, 1, 64])];
            tensor<bool, [4]> sin_3_end_mask_0 = const()[name = string("sin_3_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 1, 1, 64]> sin_3_cast_fp16 = slice_by_index(begin = sin_3_begin_0, end = sin_3_end_0, end_mask = sin_3_end_mask_0, x = sin_1_cast_fp16)[name = string("sin_3_cast_fp16")];
            tensor<fp16, [1, 32, 1, 64]> var_210_cast_fp16 = mul(x = x1_1, y = cos_3_cast_fp16)[name = string("op_210_cast_fp16")];
            tensor<fp16, [1, 32, 1, 64]> var_211_cast_fp16 = mul(x = x2_1, y = sin_3_cast_fp16)[name = string("op_211_cast_fp16")];
            tensor<fp16, [1, 32, 1, 64]> var_212_cast_fp16 = sub(x = var_210_cast_fp16, y = var_211_cast_fp16)[name = string("op_212_cast_fp16")];
            tensor<fp16, [1, 32, 1, 64]> var_213_cast_fp16 = mul(x = x2_1, y = cos_3_cast_fp16)[name = string("op_213_cast_fp16")];
            tensor<fp16, [1, 32, 1, 64]> var_214_cast_fp16 = mul(x = x1_1, y = sin_3_cast_fp16)[name = string("op_214_cast_fp16")];
            tensor<fp16, [1, 32, 1, 64]> var_215_cast_fp16 = add(x = var_213_cast_fp16, y = var_214_cast_fp16)[name = string("op_215_cast_fp16")];
            bool rotated_1_interleave_0 = const()[name = string("rotated_1_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 32, 1, 128]> rotated_1_cast_fp16 = concat(axis = var_41, interleave = rotated_1_interleave_0, values = (var_212_cast_fp16, var_215_cast_fp16))[name = string("rotated_1_cast_fp16")];
            tensor<int32, [4]> x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor<int32, [4]>([1, 8, 1, 64])];
            tensor<bool, [4]> x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 8, 1, 64]> x1_3 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = var_187)[name = string("x1_3")];
            tensor<int32, [4]> x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor<int32, [4]>([1, 8, 1, 128])];
            tensor<bool, [4]> x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 8, 1, 64]> x2_3 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = var_187)[name = string("x2_3")];
            tensor<fp16, [1, 8, 1, 64]> var_231_cast_fp16 = mul(x = x1_3, y = cos_3_cast_fp16)[name = string("op_231_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_232_cast_fp16 = mul(x = x2_3, y = sin_3_cast_fp16)[name = string("op_232_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_233_cast_fp16 = sub(x = var_231_cast_fp16, y = var_232_cast_fp16)[name = string("op_233_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_234_cast_fp16 = mul(x = x2_3, y = cos_3_cast_fp16)[name = string("op_234_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_235_cast_fp16 = mul(x = x1_3, y = sin_3_cast_fp16)[name = string("op_235_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_236_cast_fp16 = add(x = var_234_cast_fp16, y = var_235_cast_fp16)[name = string("op_236_cast_fp16")];
            bool rotated_3_interleave_0 = const()[name = string("rotated_3_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 8, 1, 128]> rotated_3_cast_fp16 = concat(axis = var_41, interleave = rotated_3_interleave_0, values = (var_233_cast_fp16, var_236_cast_fp16))[name = string("rotated_3_cast_fp16")];
            int32 var_240 = const()[name = string("op_240"), val = int32(1)];
            tensor<int32, [1]> var_241 = add(x = current_pos, y = var_240)[name = string("op_241")];
            tensor<fp16, [64, 8, 1024, 128]> read_state_0 = read_state(input = model_model_kv_cache_0)[name = string("read_state_0")];
            tensor<int32, [1]> expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor<int32, [1]>([12])];
            tensor<int32, [1]> expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor<int32, [1]>([13])];
            int32 concat_2_axis_0 = const()[name = string("concat_2_axis_0"), val = int32(0)];
            bool concat_2_interleave_0 = const()[name = string("concat_2_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_2 = concat(axis = concat_2_axis_0, interleave = concat_2_interleave_0, values = (expand_dims_0, expand_dims_1, current_pos, expand_dims_3))[name = string("concat_2")];
            tensor<int32, [1]> concat_3_values1_0 = const()[name = string("concat_3_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)];
            bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_4, concat_3_values1_0, var_241, concat_3_values3_0))[name = string("concat_3")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [64, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_1_stride_0, update = rotated_3_cast_fp16, x = read_state_0)[name = string("model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_0_write_state")];
            tensor<fp16, [64, 8, 1024, 128]> coreml_update_state_8 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_0")];
            tensor<int32, [1]> expand_dims_6 = const()[name = string("expand_dims_6"), val = tensor<int32, [1]>([44])];
            tensor<int32, [1]> expand_dims_7 = const()[name = string("expand_dims_7"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_9 = const()[name = string("expand_dims_9"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_10 = const()[name = string("expand_dims_10"), val = tensor<int32, [1]>([45])];
            int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)];
            bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (expand_dims_6, expand_dims_7, current_pos, expand_dims_9))[name = string("concat_6")];
            tensor<int32, [1]> concat_7_values1_0 = const()[name = string("concat_7_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_7_values3_0 = const()[name = string("concat_7_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_7_axis_0 = const()[name = string("concat_7_axis_0"), val = int32(0)];
            bool concat_7_interleave_0 = const()[name = string("concat_7_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_7 = concat(axis = concat_7_axis_0, interleave = concat_7_interleave_0, values = (expand_dims_10, concat_7_values1_0, var_241, concat_7_values3_0))[name = string("concat_7")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [64, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_6, begin_mask = model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0, end = concat_7, end_mask = model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_2_stride_0, update = var_196, x = coreml_update_state_8)[name = string("model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_1_write_state")];
            tensor<fp16, [64, 8, 1024, 128]> coreml_update_state_9 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_1")];
            tensor<int32, [4]> var_256_begin_0 = const()[name = string("op_256_begin_0"), val = tensor<int32, [4]>([12, 0, 0, 0])];
            tensor<int32, [4]> var_256_end_0 = const()[name = string("op_256_end_0"), val = tensor<int32, [4]>([13, 8, 1024, 128])];
            tensor<bool, [4]> var_256_end_mask_0 = const()[name = string("op_256_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_256_cast_fp16 = slice_by_index(begin = var_256_begin_0, end = var_256_end_0, end_mask = var_256_end_mask_0, x = coreml_update_state_9)[name = string("op_256_cast_fp16")];
            tensor<int32, [1]> K_layer_cache_1_axes_0 = const()[name = string("K_layer_cache_1_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> K_layer_cache_1_cast_fp16 = squeeze(axes = K_layer_cache_1_axes_0, x = var_256_cast_fp16)[name = string("K_layer_cache_1_cast_fp16")];
            tensor<int32, [4]> var_258_begin_0 = const()[name = string("op_258_begin_0"), val = tensor<int32, [4]>([44, 0, 0, 0])];
            tensor<int32, [4]> var_258_end_0 = const()[name = string("op_258_end_0"), val = tensor<int32, [4]>([45, 8, 1024, 128])];
            tensor<bool, [4]> var_258_end_mask_0 = const()[name = string("op_258_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_258_cast_fp16 = slice_by_index(begin = var_258_begin_0, end = var_258_end_0, end_mask = var_258_end_mask_0, x = coreml_update_state_9)[name = string("op_258_cast_fp16")];
            tensor<int32, [1]> V_layer_cache_1_axes_0 = const()[name = string("V_layer_cache_1_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> V_layer_cache_1_cast_fp16 = squeeze(axes = V_layer_cache_1_axes_0, x = var_258_cast_fp16)[name = string("V_layer_cache_1_cast_fp16")];
            tensor<int32, [1]> x_11_axes_0 = const()[name = string("x_11_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_11_cast_fp16 = expand_dims(axes = x_11_axes_0, x = K_layer_cache_1_cast_fp16)[name = string("x_11_cast_fp16")];
            tensor<int32, [4]> var_267 = const()[name = string("op_267"), val = tensor<int32, [4]>([1, 4, 1, 1])];
            tensor<fp16, [8, 4, 1024, 128]> x_13_cast_fp16 = tile(reps = var_267, x = x_11_cast_fp16)[name = string("x_13_cast_fp16")];
            tensor<int32, [4]> var_271 = const()[name = string("op_271"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
            tensor<fp16, [1, 32, 1024, 128]> key_states_3_cast_fp16 = reshape(shape = var_271, x = x_13_cast_fp16)[name = string("key_states_3_cast_fp16")];
            tensor<int32, [1]> x_17_axes_0 = const()[name = string("x_17_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_17_cast_fp16 = expand_dims(axes = x_17_axes_0, x = V_layer_cache_1_cast_fp16)[name = string("x_17_cast_fp16")];
            tensor<int32, [4]> var_274 = const()[name = string("op_274"), val = tensor<int32, [4]>([1, 4, 1, 1])];
            tensor<fp16, [8, 4, 1024, 128]> x_19_cast_fp16 = tile(reps = var_274, x = x_17_cast_fp16)[name = string("x_19_cast_fp16")];
            tensor<int32, [4]> var_278 = const()[name = string("op_278"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
            tensor<fp16, [1, 32, 1024, 128]> value_states_3_cast_fp16 = reshape(shape = var_278, x = x_19_cast_fp16)[name = string("value_states_3_cast_fp16")];
            bool var_281_transpose_x_1 = const()[name = string("op_281_transpose_x_1"), val = bool(false)];
            bool var_281_transpose_y_1 = const()[name = string("op_281_transpose_y_1"), val = bool(true)];
            tensor<fp16, [1, 32, 1, 1024]> var_281_cast_fp16 = matmul(transpose_x = var_281_transpose_x_1, transpose_y = var_281_transpose_y_1, x = rotated_1_cast_fp16, y = key_states_3_cast_fp16)[name = string("op_281_cast_fp16")];
            fp16 var_282_to_fp16 = const()[name = string("op_282_to_fp16"), val = fp16(0x1.6ap-4)];
            tensor<fp16, [1, 32, 1, 1024]> attn_weights_1_cast_fp16 = mul(x = var_281_cast_fp16, y = var_282_to_fp16)[name = string("attn_weights_1_cast_fp16")];
            tensor<fp16, [1, 32, 1, 1024]> x_21_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask)[name = string("x_21_cast_fp16")];
            tensor<int32, [1]> reduce_max_0_axes_0 = const()[name = string("reduce_max_0_axes_0"), val = tensor<int32, [1]>([-1])];
            bool reduce_max_0_keep_dims_0 = const()[name = string("reduce_max_0_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 32, 1, 1]> reduce_max_0_cast_fp16 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = x_21_cast_fp16)[name = string("reduce_max_0_cast_fp16")];
            tensor<fp16, [1, 32, 1, 1024]> x_23_cast_fp16 = sub(x = x_21_cast_fp16, y = reduce_max_0_cast_fp16)[name = string("x_23_cast_fp16")];
            tensor<fp16, [1, 32, 1, 1024]> exp_x_1_cast_fp16 = exp(x = x_23_cast_fp16)[name = string("exp_x_1_cast_fp16")];
            tensor<int32, [1]> var_293_axes_0 = const()[name = string("op_293_axes_0"), val = tensor<int32, [1]>([-1])];
            bool var_293_keep_dims_0 = const()[name = string("op_293_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 32, 1, 1]> var_293_cast_fp16 = reduce_sum(axes = var_293_axes_0, keep_dims = var_293_keep_dims_0, x = exp_x_1_cast_fp16)[name = string("op_293_cast_fp16")];
            tensor<fp16, [1, 32, 1, 1024]> attn_weights_3_cast_fp16 = real_div(x = exp_x_1_cast_fp16, y = var_293_cast_fp16)[name = string("attn_weights_3_cast_fp16")];
            bool attn_output_1_transpose_x_0 = const()[name = string("attn_output_1_transpose_x_0"), val = bool(false)];
            bool attn_output_1_transpose_y_0 = const()[name = string("attn_output_1_transpose_y_0"), val = bool(false)];
            tensor<fp16, [1, 32, 1, 128]> attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_0, transpose_y = attn_output_1_transpose_y_0, x = attn_weights_3_cast_fp16, y = value_states_3_cast_fp16)[name = string("attn_output_1_cast_fp16")];
            tensor<int32, [4]> var_296_perm_0 = const()[name = string("op_296_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_298 = const()[name = string("op_298"), val = tensor<int32, [3]>([1, 1, 4096])];
            tensor<fp16, [1, 1, 32, 128]> var_296_cast_fp16 = transpose(perm = var_296_perm_0, x = attn_output_1_cast_fp16)[name = string("transpose_14")];
            tensor<fp16, [1, 1, 4096]> input_5_cast_fp16 = reshape(shape = var_298, x = var_296_cast_fp16)[name = string("input_5_cast_fp16")];
            tensor<fp16, [4096, 4096]> model_model_layers_12_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [4096, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(673590528))), lut = tensor<fp16, [512, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686173504))))[name = string("model_model_layers_12_self_attn_o_proj_weight_promoted_to_fp16_palettized")];
            tensor<fp16, [4096]> linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686239104)))];
            tensor<fp16, [1, 1, 4096]> linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_12_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_5_cast_fp16)[name = string("linear_0_cast_fp16")];
            tensor<fp16, [1, 1, 4096]> hidden_states_5_cast_fp16 = add(x = hidden_states, y = linear_0_cast_fp16)[name = string("hidden_states_5_cast_fp16")];
            tensor<int32, [1]> mean_3_axes_0 = const()[name = string("mean_3_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_3_keep_dims_0 = const()[name = string("mean_3_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 1, 1]> mean_3_cast_fp16 = reduce_mean(axes = mean_3_axes_0, keep_dims = mean_3_keep_dims_0, x = hidden_states_5_cast_fp16)[name = string("mean_3_cast_fp16")];
            tensor<fp16, [1, 1, 4096]> input_7_cast_fp16 = sub(x = hidden_states_5_cast_fp16, y = mean_3_cast_fp16)[name = string("input_7_cast_fp16")];
            tensor<int32, [1]> var_309_axes_0 = const()[name = string("op_309_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [4096]> model_model_layers_12_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_12_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686247360)))];
            tensor<fp16, [1, 1, 4096]> var_309_cast_fp16 = layer_norm(axes = var_309_axes_0, epsilon = var_36_to_fp16, gamma = model_model_layers_12_post_attention_layernorm_weight_to_fp16, x = input_7_cast_fp16)[name = string("op_309_cast_fp16")];
            tensor<int32, [3]> var_316 = const()[name = string("op_316"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> input_9_axes_0 = const()[name = string("input_9_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 4096, 1]> var_317 = transpose(perm = var_316, x = var_309_cast_fp16)[name = string("transpose_13")];
            tensor<fp16, [1, 4096, 1, 1]> input_9 = expand_dims(axes = input_9_axes_0, x = var_317)[name = string("input_9")];
            string input_11_pad_type_0 = const()[name = string("input_11_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> input_11_strides_0 = const()[name = string("input_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> input_11_pad_0 = const()[name = string("input_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> input_11_dilations_0 = const()[name = string("input_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 input_11_groups_0 = const()[name = string("input_11_groups_0"), val = int32(1)];
            tensor<fp16, [1, 14336, 1, 1]> input_11 = conv(dilations = input_11_dilations_0, groups = input_11_groups_0, pad = input_11_pad_0, pad_type = input_11_pad_type_0, strides = input_11_strides_0, weight = model_model_layers_12_mlp_gate_proj_weight_palettized, x = input_9)[name = string("input_11")];
            string up_states_1_pad_type_0 = const()[name = string("up_states_1_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> up_states_1_strides_0 = const()[name = string("up_states_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> up_states_1_pad_0 = const()[name = string("up_states_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> up_states_1_dilations_0 = const()[name = string("up_states_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 up_states_1_groups_0 = const()[name = string("up_states_1_groups_0"), val = int32(1)];
            tensor<fp16, [1, 14336, 1, 1]> up_states_1 = conv(dilations = up_states_1_dilations_0, groups = up_states_1_groups_0, pad = up_states_1_pad_0, pad_type = up_states_1_pad_type_0, strides = up_states_1_strides_0, weight = model_model_layers_12_mlp_up_proj_weight_palettized, x = input_9)[name = string("up_states_1")];
            tensor<fp16, [1, 14336, 1, 1]> gate_states_1 = silu(x = input_11)[name = string("gate_states_1")];
            tensor<fp16, [1, 14336, 1, 1]> input_13 = mul(x = gate_states_1, y = up_states_1)[name = string("input_13")];
            string hidden_states_7_pad_type_0 = const()[name = string("hidden_states_7_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> hidden_states_7_strides_0 = const()[name = string("hidden_states_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> hidden_states_7_pad_0 = const()[name = string("hidden_states_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> hidden_states_7_dilations_0 = const()[name = string("hidden_states_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 hidden_states_7_groups_0 = const()[name = string("hidden_states_7_groups_0"), val = int32(1)];
            tensor<fp16, [1, 4096, 1, 1]> hidden_states_7 = conv(dilations = hidden_states_7_dilations_0, groups = hidden_states_7_groups_0, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = hidden_states_7_strides_0, weight = model_model_layers_12_mlp_down_proj_weight_palettized, x = input_13)[name = string("hidden_states_7")];
            tensor<int32, [1]> var_339_axes_0 = const()[name = string("op_339_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 4096, 1]> var_339 = squeeze(axes = var_339_axes_0, x = hidden_states_7)[name = string("op_339")];
            tensor<int32, [3]> var_340 = const()[name = string("op_340"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 1, 4096]> var_341 = transpose(perm = var_340, x = var_339)[name = string("transpose_12")];
            tensor<fp16, [1, 1, 4096]> hidden_states_9_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = var_341)[name = string("hidden_states_9_cast_fp16")];
            tensor<int32, [1]> mean_5_axes_0 = const()[name = string("mean_5_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_5_keep_dims_0 = const()[name = string("mean_5_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 1, 1]> mean_5_cast_fp16 = reduce_mean(axes = mean_5_axes_0, keep_dims = mean_5_keep_dims_0, x = hidden_states_9_cast_fp16)[name = string("mean_5_cast_fp16")];
            tensor<fp16, [1, 1, 4096]> input_15_cast_fp16 = sub(x = hidden_states_9_cast_fp16, y = mean_5_cast_fp16)[name = string("input_15_cast_fp16")];
            tensor<int32, [1]> var_349_axes_0 = const()[name = string("op_349_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [4096]> model_model_layers_13_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_13_input_layernorm_weight_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686255616)))];
            tensor<fp16, [1, 1, 4096]> var_349_cast_fp16 = layer_norm(axes = var_349_axes_0, epsilon = var_36_to_fp16, gamma = model_model_layers_13_input_layernorm_weight_to_fp16, x = input_15_cast_fp16)[name = string("op_349_cast_fp16")];
            tensor<int32, [3]> var_352 = const()[name = string("op_352"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> var_354_axes_0 = const()[name = string("op_354_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 4096, 1]> var_353 = transpose(perm = var_352, x = var_349_cast_fp16)[name = string("transpose_11")];
            tensor<fp16, [1, 4096, 1, 1]> var_354 = expand_dims(axes = var_354_axes_0, x = var_353)[name = string("op_354")];
            string var_361_pad_type_0 = const()[name = string("op_361_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> var_361_strides_0 = const()[name = string("op_361_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> var_361_pad_0 = const()[name = string("op_361_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> var_361_dilations_0 = const()[name = string("op_361_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 var_361_groups_0 = const()[name = string("op_361_groups_0"), val = int32(1)];
            tensor<fp16, [1, 4096, 1, 1]> var_361 = conv(dilations = var_361_dilations_0, groups = var_361_groups_0, pad = var_361_pad_0, pad_type = var_361_pad_type_0, strides = var_361_strides_0, weight = model_model_layers_13_self_attn_q_proj_weight_palettized, x = var_354)[name = string("op_361")];
            tensor<int32, [4]> var_362 = const()[name = string("op_362"), val = tensor<int32, [4]>([1, 32, 1, 128])];
            tensor<fp16, [1, 32, 1, 128]> var_363 = reshape(shape = var_362, x = var_361)[name = string("op_363")];
            string var_370_pad_type_0 = const()[name = string("op_370_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> var_370_strides_0 = const()[name = string("op_370_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> var_370_pad_0 = const()[name = string("op_370_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> var_370_dilations_0 = const()[name = string("op_370_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 var_370_groups_0 = const()[name = string("op_370_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 1]> var_370 = conv(dilations = var_370_dilations_0, groups = var_370_groups_0, pad = var_370_pad_0, pad_type = var_370_pad_type_0, strides = var_370_strides_0, weight = model_model_layers_13_self_attn_k_proj_weight_palettized, x = var_354)[name = string("op_370")];
            tensor<int32, [4]> var_371 = const()[name = string("op_371"), val = tensor<int32, [4]>([1, 8, 1, 128])];
            tensor<fp16, [1, 8, 1, 128]> var_372 = reshape(shape = var_371, x = var_370)[name = string("op_372")];
            string var_379_pad_type_0 = const()[name = string("op_379_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> var_379_strides_0 = const()[name = string("op_379_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> var_379_pad_0 = const()[name = string("op_379_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> var_379_dilations_0 = const()[name = string("op_379_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 var_379_groups_0 = const()[name = string("op_379_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 1]> var_379 = conv(dilations = var_379_dilations_0, groups = var_379_groups_0, pad = var_379_pad_0, pad_type = var_379_pad_type_0, strides = var_379_strides_0, weight = model_model_layers_13_self_attn_v_proj_weight_palettized, x = var_354)[name = string("op_379")];
            tensor<int32, [4]> var_380 = const()[name = string("op_380"), val = tensor<int32, [4]>([1, 8, 1, 128])];
            tensor<fp16, [1, 8, 1, 128]> var_381 = reshape(shape = var_380, x = var_379)[name = string("op_381")];
            tensor<int32, [4]> x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor<int32, [4]>([1, 32, 1, 64])];
            tensor<bool, [4]> x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 32, 1, 64]> x1_5 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = var_363)[name = string("x1_5")];
            tensor<int32, [4]> x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor<int32, [4]>([1, 32, 1, 128])];
            tensor<bool, [4]> x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 32, 1, 64]> x2_5 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = var_363)[name = string("x2_5")];
            tensor<fp16, [1, 32, 1, 64]> var_395_cast_fp16 = mul(x = x1_5, y = cos_3_cast_fp16)[name = string("op_395_cast_fp16")];
            tensor<fp16, [1, 32, 1, 64]> var_396_cast_fp16 = mul(x = x2_5, y = sin_3_cast_fp16)[name = string("op_396_cast_fp16")];
            tensor<fp16, [1, 32, 1, 64]> var_397_cast_fp16 = sub(x = var_395_cast_fp16, y = var_396_cast_fp16)[name = string("op_397_cast_fp16")];
            tensor<fp16, [1, 32, 1, 64]> var_398_cast_fp16 = mul(x = x2_5, y = cos_3_cast_fp16)[name = string("op_398_cast_fp16")];
            tensor<fp16, [1, 32, 1, 64]> var_399_cast_fp16 = mul(x = x1_5, y = sin_3_cast_fp16)[name = string("op_399_cast_fp16")];
            tensor<fp16, [1, 32, 1, 64]> var_400_cast_fp16 = add(x = var_398_cast_fp16, y = var_399_cast_fp16)[name = string("op_400_cast_fp16")];
            bool rotated_5_interleave_0 = const()[name = string("rotated_5_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 32, 1, 128]> rotated_5_cast_fp16 = concat(axis = var_41, interleave = rotated_5_interleave_0, values = (var_397_cast_fp16, var_400_cast_fp16))[name = string("rotated_5_cast_fp16")];
            tensor<int32, [4]> x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor<int32, [4]>([1, 8, 1, 64])];
            tensor<bool, [4]> x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 8, 1, 64]> x1_7 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = var_372)[name = string("x1_7")];
            tensor<int32, [4]> x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor<int32, [4]>([1, 8, 1, 128])];
            tensor<bool, [4]> x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 8, 1, 64]> x2_7 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = var_372)[name = string("x2_7")];
            tensor<fp16, [1, 8, 1, 64]> var_416_cast_fp16 = mul(x = x1_7, y = cos_3_cast_fp16)[name = string("op_416_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_417_cast_fp16 = mul(x = x2_7, y = sin_3_cast_fp16)[name = string("op_417_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_418_cast_fp16 = sub(x = var_416_cast_fp16, y = var_417_cast_fp16)[name = string("op_418_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_419_cast_fp16 = mul(x = x2_7, y = cos_3_cast_fp16)[name = string("op_419_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_420_cast_fp16 = mul(x = x1_7, y = sin_3_cast_fp16)[name = string("op_420_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_421_cast_fp16 = add(x = var_419_cast_fp16, y = var_420_cast_fp16)[name = string("op_421_cast_fp16")];
            bool rotated_7_interleave_0 = const()[name = string("rotated_7_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 8, 1, 128]> rotated_7_cast_fp16 = concat(axis = var_41, interleave = rotated_7_interleave_0, values = (var_418_cast_fp16, var_421_cast_fp16))[name = string("rotated_7_cast_fp16")];
            tensor<int32, [1]> expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor<int32, [1]>([13])];
            tensor<int32, [1]> expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor<int32, [1]>([14])];
            int32 concat_10_axis_0 = const()[name = string("concat_10_axis_0"), val = int32(0)];
            bool concat_10_interleave_0 = const()[name = string("concat_10_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_10 = concat(axis = concat_10_axis_0, interleave = concat_10_interleave_0, values = (expand_dims_12, expand_dims_13, current_pos, expand_dims_15))[name = string("concat_10")];
            tensor<int32, [1]> concat_11_values1_0 = const()[name = string("concat_11_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_11_values3_0 = const()[name = string("concat_11_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_11_axis_0 = const()[name = string("concat_11_axis_0"), val = int32(0)];
            bool concat_11_interleave_0 = const()[name = string("concat_11_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_11 = concat(axis = concat_11_axis_0, interleave = concat_11_interleave_0, values = (expand_dims_16, concat_11_values1_0, var_241, concat_11_values3_0))[name = string("concat_11")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [64, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_10, begin_mask = model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0, end = concat_11, end_mask = model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_3_stride_0, update = rotated_7_cast_fp16, x = coreml_update_state_9)[name = string("model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_2_write_state")];
            tensor<fp16, [64, 8, 1024, 128]> coreml_update_state_10 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_2")];
            tensor<int32, [1]> expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor<int32, [1]>([45])];
            tensor<int32, [1]> expand_dims_19 = const()[name = string("expand_dims_19"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor<int32, [1]>([46])];
            int32 concat_14_axis_0 = const()[name = string("concat_14_axis_0"), val = int32(0)];
            bool concat_14_interleave_0 = const()[name = string("concat_14_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_14 = concat(axis = concat_14_axis_0, interleave = concat_14_interleave_0, values = (expand_dims_18, expand_dims_19, current_pos, expand_dims_21))[name = string("concat_14")];
            tensor<int32, [1]> concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_15_values3_0 = const()[name = string("concat_15_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)];
            bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (expand_dims_22, concat_15_values1_0, var_241, concat_15_values3_0))[name = string("concat_15")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [64, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_14, begin_mask = model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0, end = concat_15, end_mask = model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_4_stride_0, update = var_381, x = coreml_update_state_10)[name = string("model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_3_write_state")];
            tensor<fp16, [64, 8, 1024, 128]> coreml_update_state_11 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_3")];
            tensor<int32, [4]> var_441_begin_0 = const()[name = string("op_441_begin_0"), val = tensor<int32, [4]>([13, 0, 0, 0])];
            tensor<int32, [4]> var_441_end_0 = const()[name = string("op_441_end_0"), val = tensor<int32, [4]>([14, 8, 1024, 128])];
            tensor<bool, [4]> var_441_end_mask_0 = const()[name = string("op_441_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_441_cast_fp16 = slice_by_index(begin = var_441_begin_0, end = var_441_end_0, end_mask = var_441_end_mask_0, x = coreml_update_state_11)[name = string("op_441_cast_fp16")];
            tensor<int32, [1]> K_layer_cache_3_axes_0 = const()[name = string("K_layer_cache_3_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> K_layer_cache_3_cast_fp16 = squeeze(axes = K_layer_cache_3_axes_0, x = var_441_cast_fp16)[name = string("K_layer_cache_3_cast_fp16")];
            tensor<int32, [4]> var_443_begin_0 = const()[name = string("op_443_begin_0"), val = tensor<int32, [4]>([45, 0, 0, 0])];
            tensor<int32, [4]> var_443_end_0 = const()[name = string("op_443_end_0"), val = tensor<int32, [4]>([46, 8, 1024, 128])];
            tensor<bool, [4]> var_443_end_mask_0 = const()[name = string("op_443_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_443_cast_fp16 = slice_by_index(begin = var_443_begin_0, end = var_443_end_0, end_mask = var_443_end_mask_0, x = coreml_update_state_11)[name = string("op_443_cast_fp16")];
            tensor<int32, [1]> V_layer_cache_3_axes_0 = const()[name = string("V_layer_cache_3_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> V_layer_cache_3_cast_fp16 = squeeze(axes = V_layer_cache_3_axes_0, x = var_443_cast_fp16)[name = string("V_layer_cache_3_cast_fp16")];
            tensor<int32, [1]> x_39_axes_0 = const()[name = string("x_39_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_39_cast_fp16 = expand_dims(axes = x_39_axes_0, x = K_layer_cache_3_cast_fp16)[name = string("x_39_cast_fp16")];
            tensor<int32, [4]> var_452 = const()[name = string("op_452"), val = tensor<int32, [4]>([1, 4, 1, 1])];
            tensor<fp16, [8, 4, 1024, 128]> x_41_cast_fp16 = tile(reps = var_452, x = x_39_cast_fp16)[name = string("x_41_cast_fp16")];
            tensor<int32, [4]> var_456 = const()[name = string("op_456"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
            tensor<fp16, [1, 32, 1024, 128]> key_states_7_cast_fp16 = reshape(shape = var_456, x = x_41_cast_fp16)[name = string("key_states_7_cast_fp16")];
            tensor<int32, [1]> x_45_axes_0 = const()[name = string("x_45_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_45_cast_fp16 = expand_dims(axes = x_45_axes_0, x = V_layer_cache_3_cast_fp16)[name = string("x_45_cast_fp16")];
            tensor<int32, [4]> var_459 = const()[name = string("op_459"), val = tensor<int32, [4]>([1, 4, 1, 1])];
            tensor<fp16, [8, 4, 1024, 128]> x_47_cast_fp16 = tile(reps = var_459, x = x_45_cast_fp16)[name = string("x_47_cast_fp16")];
            tensor<int32, [4]> var_463 = const()[name = string("op_463"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
            tensor<fp16, [1, 32, 1024, 128]> value_states_7_cast_fp16 = reshape(shape = var_463, x = x_47_cast_fp16)[name = string("value_states_7_cast_fp16")];
            bool var_466_transpose_x_1 = const()[name = string("op_466_transpose_x_1"), val = bool(false)];
            bool var_466_transpose_y_1 = const()[name = string("op_466_transpose_y_1"), val = bool(true)];
            tensor<fp16, [1, 32, 1, 1024]> var_466_cast_fp16 = matmul(transpose_x = var_466_transpose_x_1, transpose_y = var_466_transpose_y_1, x = rotated_5_cast_fp16, y = key_states_7_cast_fp16)[name = string("op_466_cast_fp16")];
            fp16 var_467_to_fp16 = const()[name = string("op_467_to_fp16"), val = fp16(0x1.6ap-4)];
            tensor<fp16, [1, 32, 1, 1024]> attn_weights_5_cast_fp16 = mul(x = var_466_cast_fp16, y = var_467_to_fp16)[name = string("attn_weights_5_cast_fp16")];
            tensor<fp16, [1, 32, 1, 1024]> x_49_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = causal_mask)[name = string("x_49_cast_fp16")];
            tensor<int32, [1]> reduce_max_1_axes_0 = const()[name = string("reduce_max_1_axes_0"), val = tensor<int32, [1]>([-1])];
            bool reduce_max_1_keep_dims_0 = const()[name = string("reduce_max_1_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 32, 1, 1]> reduce_max_1_cast_fp16 = reduce_max(axes = reduce_max_1_axes_0, keep_dims = reduce_max_1_keep_dims_0, x = x_49_cast_fp16)[name = string("reduce_max_1_cast_fp16")];
            tensor<fp16, [1, 32, 1, 1024]> x_51_cast_fp16 = sub(x = x_49_cast_fp16, y = reduce_max_1_cast_fp16)[name = string("x_51_cast_fp16")];
            tensor<fp16, [1, 32, 1, 1024]> exp_x_3_cast_fp16 = exp(x = x_51_cast_fp16)[name = string("exp_x_3_cast_fp16")];
            tensor<int32, [1]> var_478_axes_0 = const()[name = string("op_478_axes_0"), val = tensor<int32, [1]>([-1])];
            bool var_478_keep_dims_0 = const()[name = string("op_478_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 32, 1, 1]> var_478_cast_fp16 = reduce_sum(axes = var_478_axes_0, keep_dims = var_478_keep_dims_0, x = exp_x_3_cast_fp16)[name = string("op_478_cast_fp16")];
            tensor<fp16, [1, 32, 1, 1024]> attn_weights_7_cast_fp16 = real_div(x = exp_x_3_cast_fp16, y = var_478_cast_fp16)[name = string("attn_weights_7_cast_fp16")];
            bool attn_output_7_transpose_x_0 = const()[name = string("attn_output_7_transpose_x_0"), val = bool(false)];
            bool attn_output_7_transpose_y_0 = const()[name = string("attn_output_7_transpose_y_0"), val = bool(false)];
            tensor<fp16, [1, 32, 1, 128]> attn_output_7_cast_fp16 = matmul(transpose_x = attn_output_7_transpose_x_0, transpose_y = attn_output_7_transpose_y_0, x = attn_weights_7_cast_fp16, y = value_states_7_cast_fp16)[name = string("attn_output_7_cast_fp16")];
            tensor<int32, [4]> var_481_perm_0 = const()[name = string("op_481_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_483 = const()[name = string("op_483"), val = tensor<int32, [3]>([1, 1, 4096])];
            tensor<fp16, [1, 1, 32, 128]> var_481_cast_fp16 = transpose(perm = var_481_perm_0, x = attn_output_7_cast_fp16)[name = string("transpose_10")];
            tensor<fp16, [1, 1, 4096]> input_19_cast_fp16 = reshape(shape = var_483, x = var_481_cast_fp16)[name = string("input_19_cast_fp16")];
            tensor<fp16, [4096, 4096]> model_model_layers_13_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [4096, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686263872))), lut = tensor<fp16, [512, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698846848))))[name = string("model_model_layers_13_self_attn_o_proj_weight_promoted_to_fp16_palettized")];
            tensor<fp16, [1, 1, 4096]> linear_1_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_13_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_19_cast_fp16)[name = string("linear_1_cast_fp16")];
            tensor<fp16, [1, 1, 4096]> hidden_states_13_cast_fp16 = add(x = hidden_states_9_cast_fp16, y = linear_1_cast_fp16)[name = string("hidden_states_13_cast_fp16")];
            tensor<int32, [1]> mean_7_axes_0 = const()[name = string("mean_7_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_7_keep_dims_0 = const()[name = string("mean_7_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 1, 1]> mean_7_cast_fp16 = reduce_mean(axes = mean_7_axes_0, keep_dims = mean_7_keep_dims_0, x = hidden_states_13_cast_fp16)[name = string("mean_7_cast_fp16")];
            tensor<fp16, [1, 1, 4096]> input_21_cast_fp16 = sub(x = hidden_states_13_cast_fp16, y = mean_7_cast_fp16)[name = string("input_21_cast_fp16")];
            tensor<int32, [1]> var_494_axes_0 = const()[name = string("op_494_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [4096]> model_model_layers_13_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_13_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698912448)))];
            tensor<fp16, [1, 1, 4096]> var_494_cast_fp16 = layer_norm(axes = var_494_axes_0, epsilon = var_36_to_fp16, gamma = model_model_layers_13_post_attention_layernorm_weight_to_fp16, x = input_21_cast_fp16)[name = string("op_494_cast_fp16")];
            tensor<int32, [3]> var_501 = const()[name = string("op_501"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> input_23_axes_0 = const()[name = string("input_23_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 4096, 1]> var_502 = transpose(perm = var_501, x = var_494_cast_fp16)[name = string("transpose_9")];
            tensor<fp16, [1, 4096, 1, 1]> input_23 = expand_dims(axes = input_23_axes_0, x = var_502)[name = string("input_23")];
            string input_25_pad_type_0 = const()[name = string("input_25_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> input_25_strides_0 = const()[name = string("input_25_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> input_25_pad_0 = const()[name = string("input_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> input_25_dilations_0 = const()[name = string("input_25_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 input_25_groups_0 = const()[name = string("input_25_groups_0"), val = int32(1)];
            tensor<fp16, [1, 14336, 1, 1]> input_25 = conv(dilations = input_25_dilations_0, groups = input_25_groups_0, pad = input_25_pad_0, pad_type = input_25_pad_type_0, strides = input_25_strides_0, weight = model_model_layers_13_mlp_gate_proj_weight_palettized, x = input_23)[name = string("input_25")];
            string up_states_3_pad_type_0 = const()[name = string("up_states_3_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> up_states_3_strides_0 = const()[name = string("up_states_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> up_states_3_pad_0 = const()[name = string("up_states_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> up_states_3_dilations_0 = const()[name = string("up_states_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 up_states_3_groups_0 = const()[name = string("up_states_3_groups_0"), val = int32(1)];
            tensor<fp16, [1, 14336, 1, 1]> up_states_3 = conv(dilations = up_states_3_dilations_0, groups = up_states_3_groups_0, pad = up_states_3_pad_0, pad_type = up_states_3_pad_type_0, strides = up_states_3_strides_0, weight = model_model_layers_13_mlp_up_proj_weight_palettized, x = input_23)[name = string("up_states_3")];
            tensor<fp16, [1, 14336, 1, 1]> gate_states_3 = silu(x = input_25)[name = string("gate_states_3")];
            tensor<fp16, [1, 14336, 1, 1]> input_27 = mul(x = gate_states_3, y = up_states_3)[name = string("input_27")];
            string hidden_states_15_pad_type_0 = const()[name = string("hidden_states_15_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> hidden_states_15_strides_0 = const()[name = string("hidden_states_15_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> hidden_states_15_pad_0 = const()[name = string("hidden_states_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> hidden_states_15_dilations_0 = const()[name = string("hidden_states_15_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 hidden_states_15_groups_0 = const()[name = string("hidden_states_15_groups_0"), val = int32(1)];
            tensor<fp16, [1, 4096, 1, 1]> hidden_states_15 = conv(dilations = hidden_states_15_dilations_0, groups = hidden_states_15_groups_0, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = hidden_states_15_strides_0, weight = model_model_layers_13_mlp_down_proj_weight_palettized, x = input_27)[name = string("hidden_states_15")];
            tensor<int32, [1]> var_524_axes_0 = const()[name = string("op_524_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 4096, 1]> var_524 = squeeze(axes = var_524_axes_0, x = hidden_states_15)[name = string("op_524")];
            tensor<int32, [3]> var_525 = const()[name = string("op_525"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 1, 4096]> var_526 = transpose(perm = var_525, x = var_524)[name = string("transpose_8")];
            tensor<fp16, [1, 1, 4096]> hidden_states_17_cast_fp16 = add(x = hidden_states_13_cast_fp16, y = var_526)[name = string("hidden_states_17_cast_fp16")];
            tensor<int32, [1]> mean_9_axes_0 = const()[name = string("mean_9_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_9_keep_dims_0 = const()[name = string("mean_9_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 1, 1]> mean_9_cast_fp16 = reduce_mean(axes = mean_9_axes_0, keep_dims = mean_9_keep_dims_0, x = hidden_states_17_cast_fp16)[name = string("mean_9_cast_fp16")];
            tensor<fp16, [1, 1, 4096]> input_29_cast_fp16 = sub(x = hidden_states_17_cast_fp16, y = mean_9_cast_fp16)[name = string("input_29_cast_fp16")];
            tensor<int32, [1]> var_534_axes_0 = const()[name = string("op_534_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [4096]> model_model_layers_14_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_14_input_layernorm_weight_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698920704)))];
            tensor<fp16, [1, 1, 4096]> var_534_cast_fp16 = layer_norm(axes = var_534_axes_0, epsilon = var_36_to_fp16, gamma = model_model_layers_14_input_layernorm_weight_to_fp16, x = input_29_cast_fp16)[name = string("op_534_cast_fp16")];
            tensor<int32, [3]> var_537 = const()[name = string("op_537"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> var_539_axes_0 = const()[name = string("op_539_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 4096, 1]> var_538 = transpose(perm = var_537, x = var_534_cast_fp16)[name = string("transpose_7")];
            tensor<fp16, [1, 4096, 1, 1]> var_539 = expand_dims(axes = var_539_axes_0, x = var_538)[name = string("op_539")];
            string var_546_pad_type_0 = const()[name = string("op_546_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> var_546_strides_0 = const()[name = string("op_546_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> var_546_pad_0 = const()[name = string("op_546_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> var_546_dilations_0 = const()[name = string("op_546_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 var_546_groups_0 = const()[name = string("op_546_groups_0"), val = int32(1)];
            tensor<fp16, [1, 4096, 1, 1]> var_546 = conv(dilations = var_546_dilations_0, groups = var_546_groups_0, pad = var_546_pad_0, pad_type = var_546_pad_type_0, strides = var_546_strides_0, weight = model_model_layers_14_self_attn_q_proj_weight_palettized, x = var_539)[name = string("op_546")];
            tensor<int32, [4]> var_547 = const()[name = string("op_547"), val = tensor<int32, [4]>([1, 32, 1, 128])];
            tensor<fp16, [1, 32, 1, 128]> var_548 = reshape(shape = var_547, x = var_546)[name = string("op_548")];
            string var_555_pad_type_0 = const()[name = string("op_555_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> var_555_strides_0 = const()[name = string("op_555_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> var_555_pad_0 = const()[name = string("op_555_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> var_555_dilations_0 = const()[name = string("op_555_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 var_555_groups_0 = const()[name = string("op_555_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 1]> var_555 = conv(dilations = var_555_dilations_0, groups = var_555_groups_0, pad = var_555_pad_0, pad_type = var_555_pad_type_0, strides = var_555_strides_0, weight = model_model_layers_14_self_attn_k_proj_weight_palettized, x = var_539)[name = string("op_555")];
            tensor<int32, [4]> var_556 = const()[name = string("op_556"), val = tensor<int32, [4]>([1, 8, 1, 128])];
            tensor<fp16, [1, 8, 1, 128]> var_557 = reshape(shape = var_556, x = var_555)[name = string("op_557")];
            string var_564_pad_type_0 = const()[name = string("op_564_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> var_564_strides_0 = const()[name = string("op_564_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> var_564_pad_0 = const()[name = string("op_564_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> var_564_dilations_0 = const()[name = string("op_564_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 var_564_groups_0 = const()[name = string("op_564_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 1]> var_564 = conv(dilations = var_564_dilations_0, groups = var_564_groups_0, pad = var_564_pad_0, pad_type = var_564_pad_type_0, strides = var_564_strides_0, weight = model_model_layers_14_self_attn_v_proj_weight_palettized, x = var_539)[name = string("op_564")];
            tensor<int32, [4]> var_565 = const()[name = string("op_565"), val = tensor<int32, [4]>([1, 8, 1, 128])];
            tensor<fp16, [1, 8, 1, 128]> var_566 = reshape(shape = var_565, x = var_564)[name = string("op_566")];
            tensor<int32, [4]> x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor<int32, [4]>([1, 32, 1, 64])];
            tensor<bool, [4]> x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 32, 1, 64]> x1_9 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = var_548)[name = string("x1_9")];
            tensor<int32, [4]> x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor<int32, [4]>([1, 32, 1, 128])];
            tensor<bool, [4]> x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 32, 1, 64]> x2_9 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = var_548)[name = string("x2_9")];
            tensor<fp16, [1, 32, 1, 64]> var_580_cast_fp16 = mul(x = x1_9, y = cos_3_cast_fp16)[name = string("op_580_cast_fp16")];
            tensor<fp16, [1, 32, 1, 64]> var_581_cast_fp16 = mul(x = x2_9, y = sin_3_cast_fp16)[name = string("op_581_cast_fp16")];
            tensor<fp16, [1, 32, 1, 64]> var_582_cast_fp16 = sub(x = var_580_cast_fp16, y = var_581_cast_fp16)[name = string("op_582_cast_fp16")];
            tensor<fp16, [1, 32, 1, 64]> var_583_cast_fp16 = mul(x = x2_9, y = cos_3_cast_fp16)[name = string("op_583_cast_fp16")];
            tensor<fp16, [1, 32, 1, 64]> var_584_cast_fp16 = mul(x = x1_9, y = sin_3_cast_fp16)[name = string("op_584_cast_fp16")];
            tensor<fp16, [1, 32, 1, 64]> var_585_cast_fp16 = add(x = var_583_cast_fp16, y = var_584_cast_fp16)[name = string("op_585_cast_fp16")];
            bool rotated_9_interleave_0 = const()[name = string("rotated_9_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 32, 1, 128]> rotated_9_cast_fp16 = concat(axis = var_41, interleave = rotated_9_interleave_0, values = (var_582_cast_fp16, var_585_cast_fp16))[name = string("rotated_9_cast_fp16")];
            tensor<int32, [4]> x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor<int32, [4]>([1, 8, 1, 64])];
            tensor<bool, [4]> x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 8, 1, 64]> x1_11 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = var_557)[name = string("x1_11")];
            tensor<int32, [4]> x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor<int32, [4]>([1, 8, 1, 128])];
            tensor<bool, [4]> x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 8, 1, 64]> x2_11 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = var_557)[name = string("x2_11")];
            tensor<fp16, [1, 8, 1, 64]> var_601_cast_fp16 = mul(x = x1_11, y = cos_3_cast_fp16)[name = string("op_601_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_602_cast_fp16 = mul(x = x2_11, y = sin_3_cast_fp16)[name = string("op_602_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_603_cast_fp16 = sub(x = var_601_cast_fp16, y = var_602_cast_fp16)[name = string("op_603_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_604_cast_fp16 = mul(x = x2_11, y = cos_3_cast_fp16)[name = string("op_604_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_605_cast_fp16 = mul(x = x1_11, y = sin_3_cast_fp16)[name = string("op_605_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_606_cast_fp16 = add(x = var_604_cast_fp16, y = var_605_cast_fp16)[name = string("op_606_cast_fp16")];
            bool rotated_11_interleave_0 = const()[name = string("rotated_11_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 8, 1, 128]> rotated_11_cast_fp16 = concat(axis = var_41, interleave = rotated_11_interleave_0, values = (var_603_cast_fp16, var_606_cast_fp16))[name = string("rotated_11_cast_fp16")];
            tensor<int32, [1]> expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor<int32, [1]>([14])];
            tensor<int32, [1]> expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_28 = const()[name = string("expand_dims_28"), val = tensor<int32, [1]>([15])];
            int32 concat_18_axis_0 = const()[name = string("concat_18_axis_0"), val = int32(0)];
            bool concat_18_interleave_0 = const()[name = string("concat_18_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_18 = concat(axis = concat_18_axis_0, interleave = concat_18_interleave_0, values = (expand_dims_24, expand_dims_25, current_pos, expand_dims_27))[name = string("concat_18")];
            tensor<int32, [1]> concat_19_values1_0 = const()[name = string("concat_19_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_19_values3_0 = const()[name = string("concat_19_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_19_axis_0 = const()[name = string("concat_19_axis_0"), val = int32(0)];
            bool concat_19_interleave_0 = const()[name = string("concat_19_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_19 = concat(axis = concat_19_axis_0, interleave = concat_19_interleave_0, values = (expand_dims_28, concat_19_values1_0, var_241, concat_19_values3_0))[name = string("concat_19")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [64, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_18, begin_mask = model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0, end = concat_19, end_mask = model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_5_stride_0, update = rotated_11_cast_fp16, x = coreml_update_state_11)[name = string("model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_4_write_state")];
            tensor<fp16, [64, 8, 1024, 128]> coreml_update_state_12 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_4")];
            tensor<int32, [1]> expand_dims_30 = const()[name = string("expand_dims_30"), val = tensor<int32, [1]>([46])];
            tensor<int32, [1]> expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_33 = const()[name = string("expand_dims_33"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor<int32, [1]>([47])];
            int32 concat_22_axis_0 = const()[name = string("concat_22_axis_0"), val = int32(0)];
            bool concat_22_interleave_0 = const()[name = string("concat_22_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_22 = concat(axis = concat_22_axis_0, interleave = concat_22_interleave_0, values = (expand_dims_30, expand_dims_31, current_pos, expand_dims_33))[name = string("concat_22")];
            tensor<int32, [1]> concat_23_values1_0 = const()[name = string("concat_23_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_23_values3_0 = const()[name = string("concat_23_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_23_axis_0 = const()[name = string("concat_23_axis_0"), val = int32(0)];
            bool concat_23_interleave_0 = const()[name = string("concat_23_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_23 = concat(axis = concat_23_axis_0, interleave = concat_23_interleave_0, values = (expand_dims_34, concat_23_values1_0, var_241, concat_23_values3_0))[name = string("concat_23")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [64, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_22, begin_mask = model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0, end = concat_23, end_mask = model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_6_stride_0, update = var_566, x = coreml_update_state_12)[name = string("model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_5_write_state")];
            tensor<fp16, [64, 8, 1024, 128]> coreml_update_state_13 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_5")];
            tensor<int32, [4]> var_626_begin_0 = const()[name = string("op_626_begin_0"), val = tensor<int32, [4]>([14, 0, 0, 0])];
            tensor<int32, [4]> var_626_end_0 = const()[name = string("op_626_end_0"), val = tensor<int32, [4]>([15, 8, 1024, 128])];
            tensor<bool, [4]> var_626_end_mask_0 = const()[name = string("op_626_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_626_cast_fp16 = slice_by_index(begin = var_626_begin_0, end = var_626_end_0, end_mask = var_626_end_mask_0, x = coreml_update_state_13)[name = string("op_626_cast_fp16")];
            tensor<int32, [1]> K_layer_cache_5_axes_0 = const()[name = string("K_layer_cache_5_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> K_layer_cache_5_cast_fp16 = squeeze(axes = K_layer_cache_5_axes_0, x = var_626_cast_fp16)[name = string("K_layer_cache_5_cast_fp16")];
            tensor<int32, [4]> var_628_begin_0 = const()[name = string("op_628_begin_0"), val = tensor<int32, [4]>([46, 0, 0, 0])];
            tensor<int32, [4]> var_628_end_0 = const()[name = string("op_628_end_0"), val = tensor<int32, [4]>([47, 8, 1024, 128])];
            tensor<bool, [4]> var_628_end_mask_0 = const()[name = string("op_628_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_628_cast_fp16 = slice_by_index(begin = var_628_begin_0, end = var_628_end_0, end_mask = var_628_end_mask_0, x = coreml_update_state_13)[name = string("op_628_cast_fp16")];
            tensor<int32, [1]> V_layer_cache_5_axes_0 = const()[name = string("V_layer_cache_5_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> V_layer_cache_5_cast_fp16 = squeeze(axes = V_layer_cache_5_axes_0, x = var_628_cast_fp16)[name = string("V_layer_cache_5_cast_fp16")];
            tensor<int32, [1]> x_67_axes_0 = const()[name = string("x_67_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_67_cast_fp16 = expand_dims(axes = x_67_axes_0, x = K_layer_cache_5_cast_fp16)[name = string("x_67_cast_fp16")];
            tensor<int32, [4]> var_637 = const()[name = string("op_637"), val = tensor<int32, [4]>([1, 4, 1, 1])];
            tensor<fp16, [8, 4, 1024, 128]> x_69_cast_fp16 = tile(reps = var_637, x = x_67_cast_fp16)[name = string("x_69_cast_fp16")];
            tensor<int32, [4]> var_641 = const()[name = string("op_641"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
            tensor<fp16, [1, 32, 1024, 128]> key_states_11_cast_fp16 = reshape(shape = var_641, x = x_69_cast_fp16)[name = string("key_states_11_cast_fp16")];
            tensor<int32, [1]> x_73_axes_0 = const()[name = string("x_73_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_73_cast_fp16 = expand_dims(axes = x_73_axes_0, x = V_layer_cache_5_cast_fp16)[name = string("x_73_cast_fp16")];
            tensor<int32, [4]> var_644 = const()[name = string("op_644"), val = tensor<int32, [4]>([1, 4, 1, 1])];
            tensor<fp16, [8, 4, 1024, 128]> x_75_cast_fp16 = tile(reps = var_644, x = x_73_cast_fp16)[name = string("x_75_cast_fp16")];
            tensor<int32, [4]> var_648 = const()[name = string("op_648"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
            tensor<fp16, [1, 32, 1024, 128]> value_states_11_cast_fp16 = reshape(shape = var_648, x = x_75_cast_fp16)[name = string("value_states_11_cast_fp16")];
            bool var_651_transpose_x_1 = const()[name = string("op_651_transpose_x_1"), val = bool(false)];
            bool var_651_transpose_y_1 = const()[name = string("op_651_transpose_y_1"), val = bool(true)];
            tensor<fp16, [1, 32, 1, 1024]> var_651_cast_fp16 = matmul(transpose_x = var_651_transpose_x_1, transpose_y = var_651_transpose_y_1, x = rotated_9_cast_fp16, y = key_states_11_cast_fp16)[name = string("op_651_cast_fp16")];
            fp16 var_652_to_fp16 = const()[name = string("op_652_to_fp16"), val = fp16(0x1.6ap-4)];
            tensor<fp16, [1, 32, 1, 1024]> attn_weights_9_cast_fp16 = mul(x = var_651_cast_fp16, y = var_652_to_fp16)[name = string("attn_weights_9_cast_fp16")];
            tensor<fp16, [1, 32, 1, 1024]> x_77_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = causal_mask)[name = string("x_77_cast_fp16")];
            tensor<int32, [1]> reduce_max_2_axes_0 = const()[name = string("reduce_max_2_axes_0"), val = tensor<int32, [1]>([-1])];
            bool reduce_max_2_keep_dims_0 = const()[name = string("reduce_max_2_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 32, 1, 1]> reduce_max_2_cast_fp16 = reduce_max(axes = reduce_max_2_axes_0, keep_dims = reduce_max_2_keep_dims_0, x = x_77_cast_fp16)[name = string("reduce_max_2_cast_fp16")];
            tensor<fp16, [1, 32, 1, 1024]> x_79_cast_fp16 = sub(x = x_77_cast_fp16, y = reduce_max_2_cast_fp16)[name = string("x_79_cast_fp16")];
            tensor<fp16, [1, 32, 1, 1024]> exp_x_5_cast_fp16 = exp(x = x_79_cast_fp16)[name = string("exp_x_5_cast_fp16")];
            tensor<int32, [1]> var_663_axes_0 = const()[name = string("op_663_axes_0"), val = tensor<int32, [1]>([-1])];
            bool var_663_keep_dims_0 = const()[name = string("op_663_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 32, 1, 1]> var_663_cast_fp16 = reduce_sum(axes = var_663_axes_0, keep_dims = var_663_keep_dims_0, x = exp_x_5_cast_fp16)[name = string("op_663_cast_fp16")];
            tensor<fp16, [1, 32, 1, 1024]> attn_weights_11_cast_fp16 = real_div(x = exp_x_5_cast_fp16, y = var_663_cast_fp16)[name = string("attn_weights_11_cast_fp16")];
            bool attn_output_13_transpose_x_0 = const()[name = string("attn_output_13_transpose_x_0"), val = bool(false)];
            bool attn_output_13_transpose_y_0 = const()[name = string("attn_output_13_transpose_y_0"), val = bool(false)];
            tensor<fp16, [1, 32, 1, 128]> attn_output_13_cast_fp16 = matmul(transpose_x = attn_output_13_transpose_x_0, transpose_y = attn_output_13_transpose_y_0, x = attn_weights_11_cast_fp16, y = value_states_11_cast_fp16)[name = string("attn_output_13_cast_fp16")];
            tensor<int32, [4]> var_666_perm_0 = const()[name = string("op_666_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_668 = const()[name = string("op_668"), val = tensor<int32, [3]>([1, 1, 4096])];
            tensor<fp16, [1, 1, 32, 128]> var_666_cast_fp16 = transpose(perm = var_666_perm_0, x = attn_output_13_cast_fp16)[name = string("transpose_6")];
            tensor<fp16, [1, 1, 4096]> input_33_cast_fp16 = reshape(shape = var_668, x = var_666_cast_fp16)[name = string("input_33_cast_fp16")];
            tensor<fp16, [4096, 4096]> model_model_layers_14_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [4096, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698928960))), lut = tensor<fp16, [512, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(711511936))))[name = string("model_model_layers_14_self_attn_o_proj_weight_promoted_to_fp16_palettized")];
            tensor<fp16, [1, 1, 4096]> linear_2_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_14_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_33_cast_fp16)[name = string("linear_2_cast_fp16")];
            tensor<fp16, [1, 1, 4096]> hidden_states_21_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = linear_2_cast_fp16)[name = string("hidden_states_21_cast_fp16")];
            tensor<int32, [1]> mean_11_axes_0 = const()[name = string("mean_11_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_11_keep_dims_0 = const()[name = string("mean_11_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 1, 1]> mean_11_cast_fp16 = reduce_mean(axes = mean_11_axes_0, keep_dims = mean_11_keep_dims_0, x = hidden_states_21_cast_fp16)[name = string("mean_11_cast_fp16")];
            tensor<fp16, [1, 1, 4096]> input_35_cast_fp16 = sub(x = hidden_states_21_cast_fp16, y = mean_11_cast_fp16)[name = string("input_35_cast_fp16")];
            tensor<int32, [1]> var_679_axes_0 = const()[name = string("op_679_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [4096]> model_model_layers_14_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_14_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(711577536)))];
            tensor<fp16, [1, 1, 4096]> var_679_cast_fp16 = layer_norm(axes = var_679_axes_0, epsilon = var_36_to_fp16, gamma = model_model_layers_14_post_attention_layernorm_weight_to_fp16, x = input_35_cast_fp16)[name = string("op_679_cast_fp16")];
            tensor<int32, [3]> var_686 = const()[name = string("op_686"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> input_37_axes_0 = const()[name = string("input_37_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 4096, 1]> var_687 = transpose(perm = var_686, x = var_679_cast_fp16)[name = string("transpose_5")];
            tensor<fp16, [1, 4096, 1, 1]> input_37 = expand_dims(axes = input_37_axes_0, x = var_687)[name = string("input_37")];
            string input_39_pad_type_0 = const()[name = string("input_39_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> input_39_strides_0 = const()[name = string("input_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> input_39_pad_0 = const()[name = string("input_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> input_39_dilations_0 = const()[name = string("input_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 input_39_groups_0 = const()[name = string("input_39_groups_0"), val = int32(1)];
            tensor<fp16, [1, 14336, 1, 1]> input_39 = conv(dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = model_model_layers_14_mlp_gate_proj_weight_palettized, x = input_37)[name = string("input_39")];
            string up_states_5_pad_type_0 = const()[name = string("up_states_5_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> up_states_5_strides_0 = const()[name = string("up_states_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> up_states_5_pad_0 = const()[name = string("up_states_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> up_states_5_dilations_0 = const()[name = string("up_states_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 up_states_5_groups_0 = const()[name = string("up_states_5_groups_0"), val = int32(1)];
            tensor<fp16, [1, 14336, 1, 1]> up_states_5 = conv(dilations = up_states_5_dilations_0, groups = up_states_5_groups_0, pad = up_states_5_pad_0, pad_type = up_states_5_pad_type_0, strides = up_states_5_strides_0, weight = model_model_layers_14_mlp_up_proj_weight_palettized, x = input_37)[name = string("up_states_5")];
            tensor<fp16, [1, 14336, 1, 1]> gate_states_5 = silu(x = input_39)[name = string("gate_states_5")];
            tensor<fp16, [1, 14336, 1, 1]> input_41 = mul(x = gate_states_5, y = up_states_5)[name = string("input_41")];
            string hidden_states_23_pad_type_0 = const()[name = string("hidden_states_23_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> hidden_states_23_strides_0 = const()[name = string("hidden_states_23_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> hidden_states_23_pad_0 = const()[name = string("hidden_states_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> hidden_states_23_dilations_0 = const()[name = string("hidden_states_23_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 hidden_states_23_groups_0 = const()[name = string("hidden_states_23_groups_0"), val = int32(1)];
            tensor<fp16, [1, 4096, 1, 1]> hidden_states_23 = conv(dilations = hidden_states_23_dilations_0, groups = hidden_states_23_groups_0, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = hidden_states_23_strides_0, weight = model_model_layers_14_mlp_down_proj_weight_palettized, x = input_41)[name = string("hidden_states_23")];
            tensor<int32, [1]> var_709_axes_0 = const()[name = string("op_709_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 4096, 1]> var_709 = squeeze(axes = var_709_axes_0, x = hidden_states_23)[name = string("op_709")];
            tensor<int32, [3]> var_710 = const()[name = string("op_710"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 1, 4096]> var_711 = transpose(perm = var_710, x = var_709)[name = string("transpose_4")];
            tensor<fp16, [1, 1, 4096]> hidden_states_25_cast_fp16 = add(x = hidden_states_21_cast_fp16, y = var_711)[name = string("hidden_states_25_cast_fp16")];
            tensor<int32, [1]> mean_13_axes_0 = const()[name = string("mean_13_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_13_keep_dims_0 = const()[name = string("mean_13_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 1, 1]> mean_13_cast_fp16 = reduce_mean(axes = mean_13_axes_0, keep_dims = mean_13_keep_dims_0, x = hidden_states_25_cast_fp16)[name = string("mean_13_cast_fp16")];
            tensor<fp16, [1, 1, 4096]> input_43_cast_fp16 = sub(x = hidden_states_25_cast_fp16, y = mean_13_cast_fp16)[name = string("input_43_cast_fp16")];
            tensor<int32, [1]> var_719_axes_0 = const()[name = string("op_719_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [4096]> model_model_layers_15_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_15_input_layernorm_weight_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(711585792)))];
            tensor<fp16, [1, 1, 4096]> var_719_cast_fp16 = layer_norm(axes = var_719_axes_0, epsilon = var_36_to_fp16, gamma = model_model_layers_15_input_layernorm_weight_to_fp16, x = input_43_cast_fp16)[name = string("op_719_cast_fp16")];
            tensor<int32, [3]> var_722 = const()[name = string("op_722"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> var_724_axes_0 = const()[name = string("op_724_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 4096, 1]> var_723 = transpose(perm = var_722, x = var_719_cast_fp16)[name = string("transpose_3")];
            tensor<fp16, [1, 4096, 1, 1]> var_724 = expand_dims(axes = var_724_axes_0, x = var_723)[name = string("op_724")];
            string var_731_pad_type_0 = const()[name = string("op_731_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> var_731_strides_0 = const()[name = string("op_731_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> var_731_pad_0 = const()[name = string("op_731_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> var_731_dilations_0 = const()[name = string("op_731_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 var_731_groups_0 = const()[name = string("op_731_groups_0"), val = int32(1)];
            tensor<fp16, [1, 4096, 1, 1]> var_731 = conv(dilations = var_731_dilations_0, groups = var_731_groups_0, pad = var_731_pad_0, pad_type = var_731_pad_type_0, strides = var_731_strides_0, weight = model_model_layers_15_self_attn_q_proj_weight_palettized, x = var_724)[name = string("op_731")];
            tensor<int32, [4]> var_732 = const()[name = string("op_732"), val = tensor<int32, [4]>([1, 32, 1, 128])];
            tensor<fp16, [1, 32, 1, 128]> var_733 = reshape(shape = var_732, x = var_731)[name = string("op_733")];
            string var_740_pad_type_0 = const()[name = string("op_740_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> var_740_strides_0 = const()[name = string("op_740_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> var_740_pad_0 = const()[name = string("op_740_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> var_740_dilations_0 = const()[name = string("op_740_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 var_740_groups_0 = const()[name = string("op_740_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 1]> var_740 = conv(dilations = var_740_dilations_0, groups = var_740_groups_0, pad = var_740_pad_0, pad_type = var_740_pad_type_0, strides = var_740_strides_0, weight = model_model_layers_15_self_attn_k_proj_weight_palettized, x = var_724)[name = string("op_740")];
            tensor<int32, [4]> var_741 = const()[name = string("op_741"), val = tensor<int32, [4]>([1, 8, 1, 128])];
            tensor<fp16, [1, 8, 1, 128]> var_742 = reshape(shape = var_741, x = var_740)[name = string("op_742")];
            string var_749_pad_type_0 = const()[name = string("op_749_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> var_749_strides_0 = const()[name = string("op_749_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> var_749_pad_0 = const()[name = string("op_749_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> var_749_dilations_0 = const()[name = string("op_749_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 var_749_groups_0 = const()[name = string("op_749_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 1]> var_749 = conv(dilations = var_749_dilations_0, groups = var_749_groups_0, pad = var_749_pad_0, pad_type = var_749_pad_type_0, strides = var_749_strides_0, weight = model_model_layers_15_self_attn_v_proj_weight_palettized, x = var_724)[name = string("op_749")];
            tensor<int32, [4]> var_750 = const()[name = string("op_750"), val = tensor<int32, [4]>([1, 8, 1, 128])];
            tensor<fp16, [1, 8, 1, 128]> var_751 = reshape(shape = var_750, x = var_749)[name = string("op_751")];
            tensor<int32, [4]> x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor<int32, [4]>([1, 32, 1, 64])];
            tensor<bool, [4]> x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 32, 1, 64]> x1_13 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = var_733)[name = string("x1_13")];
            tensor<int32, [4]> x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor<int32, [4]>([1, 32, 1, 128])];
            tensor<bool, [4]> x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 32, 1, 64]> x2_13 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = var_733)[name = string("x2_13")];
            tensor<fp16, [1, 32, 1, 64]> var_765_cast_fp16 = mul(x = x1_13, y = cos_3_cast_fp16)[name = string("op_765_cast_fp16")];
            tensor<fp16, [1, 32, 1, 64]> var_766_cast_fp16 = mul(x = x2_13, y = sin_3_cast_fp16)[name = string("op_766_cast_fp16")];
            tensor<fp16, [1, 32, 1, 64]> var_767_cast_fp16 = sub(x = var_765_cast_fp16, y = var_766_cast_fp16)[name = string("op_767_cast_fp16")];
            tensor<fp16, [1, 32, 1, 64]> var_768_cast_fp16 = mul(x = x2_13, y = cos_3_cast_fp16)[name = string("op_768_cast_fp16")];
            tensor<fp16, [1, 32, 1, 64]> var_769_cast_fp16 = mul(x = x1_13, y = sin_3_cast_fp16)[name = string("op_769_cast_fp16")];
            tensor<fp16, [1, 32, 1, 64]> var_770_cast_fp16 = add(x = var_768_cast_fp16, y = var_769_cast_fp16)[name = string("op_770_cast_fp16")];
            bool rotated_13_interleave_0 = const()[name = string("rotated_13_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 32, 1, 128]> rotated_13_cast_fp16 = concat(axis = var_41, interleave = rotated_13_interleave_0, values = (var_767_cast_fp16, var_770_cast_fp16))[name = string("rotated_13_cast_fp16")];
            tensor<int32, [4]> x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_end_0 = const()[name = string("x1_end_0"), val = tensor<int32, [4]>([1, 8, 1, 64])];
            tensor<bool, [4]> x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 8, 1, 64]> x1 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = var_742)[name = string("x1")];
            tensor<int32, [4]> x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_end_0 = const()[name = string("x2_end_0"), val = tensor<int32, [4]>([1, 8, 1, 128])];
            tensor<bool, [4]> x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 8, 1, 64]> x2 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = var_742)[name = string("x2")];
            tensor<fp16, [1, 8, 1, 64]> var_786_cast_fp16 = mul(x = x1, y = cos_3_cast_fp16)[name = string("op_786_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_787_cast_fp16 = mul(x = x2, y = sin_3_cast_fp16)[name = string("op_787_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_788_cast_fp16 = sub(x = var_786_cast_fp16, y = var_787_cast_fp16)[name = string("op_788_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_789_cast_fp16 = mul(x = x2, y = cos_3_cast_fp16)[name = string("op_789_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_790_cast_fp16 = mul(x = x1, y = sin_3_cast_fp16)[name = string("op_790_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_791_cast_fp16 = add(x = var_789_cast_fp16, y = var_790_cast_fp16)[name = string("op_791_cast_fp16")];
            bool rotated_interleave_0 = const()[name = string("rotated_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 8, 1, 128]> rotated_cast_fp16 = concat(axis = var_41, interleave = rotated_interleave_0, values = (var_788_cast_fp16, var_791_cast_fp16))[name = string("rotated_cast_fp16")];
            tensor<int32, [1]> expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor<int32, [1]>([15])];
            tensor<int32, [1]> expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_40 = const()[name = string("expand_dims_40"), val = tensor<int32, [1]>([16])];
            int32 concat_26_axis_0 = const()[name = string("concat_26_axis_0"), val = int32(0)];
            bool concat_26_interleave_0 = const()[name = string("concat_26_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_26 = concat(axis = concat_26_axis_0, interleave = concat_26_interleave_0, values = (expand_dims_36, expand_dims_37, current_pos, expand_dims_39))[name = string("concat_26")];
            tensor<int32, [1]> concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)];
            bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (expand_dims_40, concat_27_values1_0, var_241, concat_27_values3_0))[name = string("concat_27")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [64, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_26, begin_mask = model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0, end = concat_27, end_mask = model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_7_stride_0, update = rotated_cast_fp16, x = coreml_update_state_13)[name = string("model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_6_write_state")];
            tensor<fp16, [64, 8, 1024, 128]> coreml_update_state_14 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_6")];
            tensor<int32, [1]> expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor<int32, [1]>([47])];
            tensor<int32, [1]> expand_dims_43 = const()[name = string("expand_dims_43"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_45 = const()[name = string("expand_dims_45"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_46 = const()[name = string("expand_dims_46"), val = tensor<int32, [1]>([48])];
            int32 concat_30_axis_0 = const()[name = string("concat_30_axis_0"), val = int32(0)];
            bool concat_30_interleave_0 = const()[name = string("concat_30_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_30 = concat(axis = concat_30_axis_0, interleave = concat_30_interleave_0, values = (expand_dims_42, expand_dims_43, current_pos, expand_dims_45))[name = string("concat_30")];
            tensor<int32, [1]> concat_31_values1_0 = const()[name = string("concat_31_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_31_values3_0 = const()[name = string("concat_31_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_31_axis_0 = const()[name = string("concat_31_axis_0"), val = int32(0)];
            bool concat_31_interleave_0 = const()[name = string("concat_31_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_31 = concat(axis = concat_31_axis_0, interleave = concat_31_interleave_0, values = (expand_dims_46, concat_31_values1_0, var_241, concat_31_values3_0))[name = string("concat_31")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [64, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_30, begin_mask = model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0, end = concat_31, end_mask = model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_8_stride_0, update = var_751, x = coreml_update_state_14)[name = string("model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_7_write_state")];
            tensor<fp16, [64, 8, 1024, 128]> coreml_update_state_15 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_7")];
            tensor<int32, [4]> var_811_begin_0 = const()[name = string("op_811_begin_0"), val = tensor<int32, [4]>([15, 0, 0, 0])];
            tensor<int32, [4]> var_811_end_0 = const()[name = string("op_811_end_0"), val = tensor<int32, [4]>([16, 8, 1024, 128])];
            tensor<bool, [4]> var_811_end_mask_0 = const()[name = string("op_811_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_811_cast_fp16 = slice_by_index(begin = var_811_begin_0, end = var_811_end_0, end_mask = var_811_end_mask_0, x = coreml_update_state_15)[name = string("op_811_cast_fp16")];
            tensor<int32, [1]> K_layer_cache_axes_0 = const()[name = string("K_layer_cache_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> K_layer_cache_cast_fp16 = squeeze(axes = K_layer_cache_axes_0, x = var_811_cast_fp16)[name = string("K_layer_cache_cast_fp16")];
            tensor<int32, [4]> var_813_begin_0 = const()[name = string("op_813_begin_0"), val = tensor<int32, [4]>([47, 0, 0, 0])];
            tensor<int32, [4]> var_813_end_0 = const()[name = string("op_813_end_0"), val = tensor<int32, [4]>([48, 8, 1024, 128])];
            tensor<bool, [4]> var_813_end_mask_0 = const()[name = string("op_813_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_813_cast_fp16 = slice_by_index(begin = var_813_begin_0, end = var_813_end_0, end_mask = var_813_end_mask_0, x = coreml_update_state_15)[name = string("op_813_cast_fp16")];
            tensor<int32, [1]> V_layer_cache_axes_0 = const()[name = string("V_layer_cache_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> V_layer_cache_cast_fp16 = squeeze(axes = V_layer_cache_axes_0, x = var_813_cast_fp16)[name = string("V_layer_cache_cast_fp16")];
            tensor<int32, [1]> x_95_axes_0 = const()[name = string("x_95_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_95_cast_fp16 = expand_dims(axes = x_95_axes_0, x = K_layer_cache_cast_fp16)[name = string("x_95_cast_fp16")];
            tensor<int32, [4]> var_822 = const()[name = string("op_822"), val = tensor<int32, [4]>([1, 4, 1, 1])];
            tensor<fp16, [8, 4, 1024, 128]> x_97_cast_fp16 = tile(reps = var_822, x = x_95_cast_fp16)[name = string("x_97_cast_fp16")];
            tensor<int32, [4]> var_826 = const()[name = string("op_826"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
            tensor<fp16, [1, 32, 1024, 128]> key_states_cast_fp16 = reshape(shape = var_826, x = x_97_cast_fp16)[name = string("key_states_cast_fp16")];
            tensor<int32, [1]> x_101_axes_0 = const()[name = string("x_101_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_101_cast_fp16 = expand_dims(axes = x_101_axes_0, x = V_layer_cache_cast_fp16)[name = string("x_101_cast_fp16")];
            tensor<int32, [4]> var_829 = const()[name = string("op_829"), val = tensor<int32, [4]>([1, 4, 1, 1])];
            tensor<fp16, [8, 4, 1024, 128]> x_103_cast_fp16 = tile(reps = var_829, x = x_101_cast_fp16)[name = string("x_103_cast_fp16")];
            tensor<int32, [4]> var_833 = const()[name = string("op_833"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
            tensor<fp16, [1, 32, 1024, 128]> value_states_cast_fp16 = reshape(shape = var_833, x = x_103_cast_fp16)[name = string("value_states_cast_fp16")];
            bool var_836_transpose_x_1 = const()[name = string("op_836_transpose_x_1"), val = bool(false)];
            bool var_836_transpose_y_1 = const()[name = string("op_836_transpose_y_1"), val = bool(true)];
            tensor<fp16, [1, 32, 1, 1024]> var_836_cast_fp16 = matmul(transpose_x = var_836_transpose_x_1, transpose_y = var_836_transpose_y_1, x = rotated_13_cast_fp16, y = key_states_cast_fp16)[name = string("op_836_cast_fp16")];
            fp16 var_837_to_fp16 = const()[name = string("op_837_to_fp16"), val = fp16(0x1.6ap-4)];
            tensor<fp16, [1, 32, 1, 1024]> attn_weights_13_cast_fp16 = mul(x = var_836_cast_fp16, y = var_837_to_fp16)[name = string("attn_weights_13_cast_fp16")];
            tensor<fp16, [1, 32, 1, 1024]> x_105_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = causal_mask)[name = string("x_105_cast_fp16")];
            tensor<int32, [1]> reduce_max_3_axes_0 = const()[name = string("reduce_max_3_axes_0"), val = tensor<int32, [1]>([-1])];
            bool reduce_max_3_keep_dims_0 = const()[name = string("reduce_max_3_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 32, 1, 1]> reduce_max_3_cast_fp16 = reduce_max(axes = reduce_max_3_axes_0, keep_dims = reduce_max_3_keep_dims_0, x = x_105_cast_fp16)[name = string("reduce_max_3_cast_fp16")];
            tensor<fp16, [1, 32, 1, 1024]> x_107_cast_fp16 = sub(x = x_105_cast_fp16, y = reduce_max_3_cast_fp16)[name = string("x_107_cast_fp16")];
            tensor<fp16, [1, 32, 1, 1024]> exp_x_cast_fp16 = exp(x = x_107_cast_fp16)[name = string("exp_x_cast_fp16")];
            tensor<int32, [1]> var_848_axes_0 = const()[name = string("op_848_axes_0"), val = tensor<int32, [1]>([-1])];
            bool var_848_keep_dims_0 = const()[name = string("op_848_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 32, 1, 1]> var_848_cast_fp16 = reduce_sum(axes = var_848_axes_0, keep_dims = var_848_keep_dims_0, x = exp_x_cast_fp16)[name = string("op_848_cast_fp16")];
            tensor<fp16, [1, 32, 1, 1024]> attn_weights_cast_fp16 = real_div(x = exp_x_cast_fp16, y = var_848_cast_fp16)[name = string("attn_weights_cast_fp16")];
            bool attn_output_19_transpose_x_0 = const()[name = string("attn_output_19_transpose_x_0"), val = bool(false)];
            bool attn_output_19_transpose_y_0 = const()[name = string("attn_output_19_transpose_y_0"), val = bool(false)];
            tensor<fp16, [1, 32, 1, 128]> attn_output_19_cast_fp16 = matmul(transpose_x = attn_output_19_transpose_x_0, transpose_y = attn_output_19_transpose_y_0, x = attn_weights_cast_fp16, y = value_states_cast_fp16)[name = string("attn_output_19_cast_fp16")];
            tensor<int32, [4]> var_851_perm_0 = const()[name = string("op_851_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_853 = const()[name = string("op_853"), val = tensor<int32, [3]>([1, 1, 4096])];
            tensor<fp16, [1, 1, 32, 128]> var_851_cast_fp16 = transpose(perm = var_851_perm_0, x = attn_output_19_cast_fp16)[name = string("transpose_2")];
            tensor<fp16, [1, 1, 4096]> input_47_cast_fp16 = reshape(shape = var_853, x = var_851_cast_fp16)[name = string("input_47_cast_fp16")];
            tensor<fp16, [4096, 4096]> model_model_layers_15_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [4096, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(711594048))), lut = tensor<fp16, [512, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(724177024))))[name = string("model_model_layers_15_self_attn_o_proj_weight_promoted_to_fp16_palettized")];
            tensor<fp16, [1, 1, 4096]> linear_3_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_15_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_47_cast_fp16)[name = string("linear_3_cast_fp16")];
            tensor<fp16, [1, 1, 4096]> hidden_states_29_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = linear_3_cast_fp16)[name = string("hidden_states_29_cast_fp16")];
            tensor<int32, [1]> mean_axes_0 = const()[name = string("mean_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_keep_dims_0 = const()[name = string("mean_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 1, 1]> mean_cast_fp16 = reduce_mean(axes = mean_axes_0, keep_dims = mean_keep_dims_0, x = hidden_states_29_cast_fp16)[name = string("mean_cast_fp16")];
            tensor<fp16, [1, 1, 4096]> input_49_cast_fp16 = sub(x = hidden_states_29_cast_fp16, y = mean_cast_fp16)[name = string("input_49_cast_fp16")];
            tensor<int32, [1]> var_864_axes_0 = const()[name = string("op_864_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [4096]> model_model_layers_15_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_15_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(724242624)))];
            tensor<fp16, [1, 1, 4096]> var_864_cast_fp16 = layer_norm(axes = var_864_axes_0, epsilon = var_36_to_fp16, gamma = model_model_layers_15_post_attention_layernorm_weight_to_fp16, x = input_49_cast_fp16)[name = string("op_864_cast_fp16")];
            tensor<int32, [3]> var_871 = const()[name = string("op_871"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> input_51_axes_0 = const()[name = string("input_51_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 4096, 1]> var_872 = transpose(perm = var_871, x = var_864_cast_fp16)[name = string("transpose_1")];
            tensor<fp16, [1, 4096, 1, 1]> input_51 = expand_dims(axes = input_51_axes_0, x = var_872)[name = string("input_51")];
            string input_53_pad_type_0 = const()[name = string("input_53_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> input_53_strides_0 = const()[name = string("input_53_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> input_53_pad_0 = const()[name = string("input_53_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> input_53_dilations_0 = const()[name = string("input_53_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 input_53_groups_0 = const()[name = string("input_53_groups_0"), val = int32(1)];
            tensor<fp16, [1, 14336, 1, 1]> input_53 = conv(dilations = input_53_dilations_0, groups = input_53_groups_0, pad = input_53_pad_0, pad_type = input_53_pad_type_0, strides = input_53_strides_0, weight = model_model_layers_15_mlp_gate_proj_weight_palettized, x = input_51)[name = string("input_53")];
            string up_states_pad_type_0 = const()[name = string("up_states_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> up_states_strides_0 = const()[name = string("up_states_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> up_states_pad_0 = const()[name = string("up_states_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> up_states_dilations_0 = const()[name = string("up_states_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 up_states_groups_0 = const()[name = string("up_states_groups_0"), val = int32(1)];
            tensor<fp16, [1, 14336, 1, 1]> up_states = conv(dilations = up_states_dilations_0, groups = up_states_groups_0, pad = up_states_pad_0, pad_type = up_states_pad_type_0, strides = up_states_strides_0, weight = model_model_layers_15_mlp_up_proj_weight_palettized, x = input_51)[name = string("up_states")];
            tensor<fp16, [1, 14336, 1, 1]> gate_states = silu(x = input_53)[name = string("gate_states")];
            tensor<fp16, [1, 14336, 1, 1]> input = mul(x = gate_states, y = up_states)[name = string("input")];
            string hidden_states_pad_type_0 = const()[name = string("hidden_states_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> hidden_states_strides_0 = const()[name = string("hidden_states_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> hidden_states_pad_0 = const()[name = string("hidden_states_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> hidden_states_dilations_0 = const()[name = string("hidden_states_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 hidden_states_groups_0 = const()[name = string("hidden_states_groups_0"), val = int32(1)];
            tensor<fp16, [1, 4096, 1, 1]> hidden_states_1 = conv(dilations = hidden_states_dilations_0, groups = hidden_states_groups_0, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = hidden_states_strides_0, weight = model_model_layers_15_mlp_down_proj_weight_palettized, x = input)[name = string("hidden_states")];
            tensor<int32, [1]> var_894_axes_0 = const()[name = string("op_894_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 4096, 1]> var_894 = squeeze(axes = var_894_axes_0, x = hidden_states_1)[name = string("op_894")];
            tensor<int32, [3]> var_895 = const()[name = string("op_895"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 1, 4096]> var_896 = transpose(perm = var_895, x = var_894)[name = string("transpose_0")];
            tensor<fp16, [1, 1, 4096]> output_hidden_states = add(x = hidden_states_29_cast_fp16, y = var_896)[name = string("op_897_cast_fp16")];
            tensor<int32, [1]> position_ids_tmp = identity(x = position_ids)[name = string("position_ids_tmp")];
        } -> (output_hidden_states);
    func prefill<ios18>(tensor<fp16, [1, 1, 256, 1024]> causal_mask, tensor<int32, [1]> current_pos, tensor<fp16, [1, 256, 4096]> hidden_states, state<tensor<fp16, [64, 8, 1024, 128]>> model_model_kv_cache_0, tensor<int32, [256]> position_ids) {
            tensor<fp16, [4096, 4096, 1, 1]> model_model_layers_12_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [4096, 4096, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor<fp16, [512, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12583040))))[name = string("model_model_layers_12_self_attn_q_proj_weight_palettized")];
            tensor<fp16, [1024, 4096, 1, 1]> model_model_layers_12_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [1024, 4096, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12648640))), lut = tensor<fp16, [128, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15794432))))[name = string("model_model_layers_12_self_attn_k_proj_weight_palettized")];
            tensor<fp16, [1024, 4096, 1, 1]> model_model_layers_12_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [1024, 4096, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15810880))), lut = tensor<fp16, [128, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18956672))))[name = string("model_model_layers_12_self_attn_v_proj_weight_palettized")];
            tensor<fp16, [14336, 4096, 1, 1]> model_model_layers_12_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [14336, 4096, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18973120))), lut = tensor<fp16, [1792, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63013376))))[name = string("model_model_layers_12_mlp_gate_proj_weight_palettized")];
            tensor<fp16, [14336, 4096, 1, 1]> model_model_layers_12_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [14336, 4096, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63242816))), lut = tensor<fp16, [1792, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107283072))))[name = string("model_model_layers_12_mlp_up_proj_weight_palettized")];
            tensor<fp16, [4096, 14336, 1, 1]> model_model_layers_12_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [4096, 14336, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107512512))), lut = tensor<fp16, [512, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151552768))))[name = string("model_model_layers_12_mlp_down_proj_weight_palettized")];
            tensor<fp16, [4096, 4096, 1, 1]> model_model_layers_13_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [4096, 4096, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151618368))), lut = tensor<fp16, [512, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(164201344))))[name = string("model_model_layers_13_self_attn_q_proj_weight_palettized")];
            tensor<fp16, [1024, 4096, 1, 1]> model_model_layers_13_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [1024, 4096, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(164266944))), lut = tensor<fp16, [128, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167412736))))[name = string("model_model_layers_13_self_attn_k_proj_weight_palettized")];
            tensor<fp16, [1024, 4096, 1, 1]> model_model_layers_13_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [1024, 4096, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167429184))), lut = tensor<fp16, [128, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170574976))))[name = string("model_model_layers_13_self_attn_v_proj_weight_palettized")];
            tensor<fp16, [14336, 4096, 1, 1]> model_model_layers_13_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [14336, 4096, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170591424))), lut = tensor<fp16, [1792, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214631680))))[name = string("model_model_layers_13_mlp_gate_proj_weight_palettized")];
            tensor<fp16, [14336, 4096, 1, 1]> model_model_layers_13_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [14336, 4096, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214861120))), lut = tensor<fp16, [1792, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(258901376))))[name = string("model_model_layers_13_mlp_up_proj_weight_palettized")];
            tensor<fp16, [4096, 14336, 1, 1]> model_model_layers_13_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [4096, 14336, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(259130816))), lut = tensor<fp16, [512, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303171072))))[name = string("model_model_layers_13_mlp_down_proj_weight_palettized")];
            tensor<fp16, [4096, 4096, 1, 1]> model_model_layers_14_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [4096, 4096, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303236672))), lut = tensor<fp16, [512, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315819648))))[name = string("model_model_layers_14_self_attn_q_proj_weight_palettized")];
            tensor<fp16, [1024, 4096, 1, 1]> model_model_layers_14_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [1024, 4096, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315885248))), lut = tensor<fp16, [128, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319031040))))[name = string("model_model_layers_14_self_attn_k_proj_weight_palettized")];
            tensor<fp16, [1024, 4096, 1, 1]> model_model_layers_14_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [1024, 4096, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319047488))), lut = tensor<fp16, [128, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322193280))))[name = string("model_model_layers_14_self_attn_v_proj_weight_palettized")];
            tensor<fp16, [14336, 4096, 1, 1]> model_model_layers_14_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [14336, 4096, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322209728))), lut = tensor<fp16, [1792, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366249984))))[name = string("model_model_layers_14_mlp_gate_proj_weight_palettized")];
            tensor<fp16, [14336, 4096, 1, 1]> model_model_layers_14_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [14336, 4096, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366479424))), lut = tensor<fp16, [1792, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410519680))))[name = string("model_model_layers_14_mlp_up_proj_weight_palettized")];
            tensor<fp16, [4096, 14336, 1, 1]> model_model_layers_14_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [4096, 14336, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410749120))), lut = tensor<fp16, [512, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(454789376))))[name = string("model_model_layers_14_mlp_down_proj_weight_palettized")];
            tensor<fp16, [4096, 4096, 1, 1]> model_model_layers_15_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [4096, 4096, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(454854976))), lut = tensor<fp16, [512, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(467437952))))[name = string("model_model_layers_15_self_attn_q_proj_weight_palettized")];
            tensor<fp16, [1024, 4096, 1, 1]> model_model_layers_15_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [1024, 4096, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(467503552))), lut = tensor<fp16, [128, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470649344))))[name = string("model_model_layers_15_self_attn_k_proj_weight_palettized")];
            tensor<fp16, [1024, 4096, 1, 1]> model_model_layers_15_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [1024, 4096, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470665792))), lut = tensor<fp16, [128, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473811584))))[name = string("model_model_layers_15_self_attn_v_proj_weight_palettized")];
            tensor<fp16, [14336, 4096, 1, 1]> model_model_layers_15_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [14336, 4096, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473828032))), lut = tensor<fp16, [1792, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(517868288))))[name = string("model_model_layers_15_mlp_gate_proj_weight_palettized")];
            tensor<fp16, [14336, 4096, 1, 1]> model_model_layers_15_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [14336, 4096, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518097728))), lut = tensor<fp16, [1792, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562137984))))[name = string("model_model_layers_15_mlp_up_proj_weight_palettized")];
            tensor<fp16, [4096, 14336, 1, 1]> model_model_layers_15_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [4096, 14336, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562367424))), lut = tensor<fp16, [512, 1, 1, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(606407680))))[name = string("model_model_layers_15_mlp_down_proj_weight_palettized")];
            int32 var_36 = const()[name = string("op_36"), val = int32(-1)];
            int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)];
            tensor<bool, [256]> greater_equal_0 = greater_equal(x = position_ids, y = greater_equal_0_y_0)[name = string("greater_equal_0")];
            int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(131072)];
            tensor<int32, [256]> add_0 = add(x = position_ids, y = slice_by_index_0)[name = string("add_0")];
            tensor<int32, [256]> select_0 = select(a = position_ids, b = add_0, cond = greater_equal_0)[name = string("select_0")];
            int32 var_153_axis_0 = const()[name = string("op_153_axis_0"), val = int32(1)];
            int32 var_153_batch_dims_0 = const()[name = string("op_153_batch_dims_0"), val = int32(0)];
            bool var_153_validate_indices_0 = const()[name = string("op_153_validate_indices_0"), val = bool(false)];
            tensor<fp16, [1, 131072, 128]> var_47_to_fp16 = const()[name = string("op_47_to_fp16"), val = tensor<fp16, [1, 131072, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(640027776)))];
            tensor<fp16, [1, 256, 128]> var_153_cast_fp16 = gather(axis = var_153_axis_0, batch_dims = var_153_batch_dims_0, indices = select_0, validate_indices = var_153_validate_indices_0, x = var_47_to_fp16)[name = string("op_153_cast_fp16")];
            tensor<int32, [4]> var_154 = const()[name = string("op_154"), val = tensor<int32, [4]>([1, 256, 1, 128])];
            tensor<fp16, [1, 256, 1, 128]> cos_1_cast_fp16 = reshape(shape = var_154, x = var_153_cast_fp16)[name = string("cos_1_cast_fp16")];
            int32 var_158_axis_0 = const()[name = string("op_158_axis_0"), val = int32(1)];
            int32 var_158_batch_dims_0 = const()[name = string("op_158_batch_dims_0"), val = int32(0)];
            bool var_158_validate_indices_0 = const()[name = string("op_158_validate_indices_0"), val = bool(false)];
            tensor<fp16, [1, 131072, 128]> var_42_to_fp16 = const()[name = string("op_42_to_fp16"), val = tensor<fp16, [1, 131072, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(606473280)))];
            tensor<fp16, [1, 256, 128]> var_158_cast_fp16 = gather(axis = var_158_axis_0, batch_dims = var_158_batch_dims_0, indices = select_0, validate_indices = var_158_validate_indices_0, x = var_42_to_fp16)[name = string("op_158_cast_fp16")];
            tensor<int32, [4]> var_159 = const()[name = string("op_159"), val = tensor<int32, [4]>([1, 256, 1, 128])];
            tensor<fp16, [1, 256, 1, 128]> sin_1_cast_fp16 = reshape(shape = var_159, x = var_158_cast_fp16)[name = string("sin_1_cast_fp16")];
            tensor<int32, [1]> mean_1_axes_0 = const()[name = string("mean_1_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_1_keep_dims_0 = const()[name = string("mean_1_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 256, 1]> mean_1_cast_fp16 = reduce_mean(axes = mean_1_axes_0, keep_dims = mean_1_keep_dims_0, x = hidden_states)[name = string("mean_1_cast_fp16")];
            tensor<fp16, [1, 256, 4096]> input_1_cast_fp16 = sub(x = hidden_states, y = mean_1_cast_fp16)[name = string("input_1_cast_fp16")];
            tensor<int32, [1]> var_169_axes_0 = const()[name = string("op_169_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [4096]> model_model_layers_12_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_12_input_layernorm_weight_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(673582272)))];
            fp16 var_38_to_fp16 = const()[name = string("op_38_to_fp16"), val = fp16(0x1.5p-17)];
            tensor<fp16, [1, 256, 4096]> var_169_cast_fp16 = layer_norm(axes = var_169_axes_0, epsilon = var_38_to_fp16, gamma = model_model_layers_12_input_layernorm_weight_to_fp16, x = input_1_cast_fp16)[name = string("op_169_cast_fp16")];
            tensor<int32, [3]> var_173 = const()[name = string("op_173"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> var_175_axes_0 = const()[name = string("op_175_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 4096, 256]> var_174 = transpose(perm = var_173, x = var_169_cast_fp16)[name = string("transpose_29")];
            tensor<fp16, [1, 4096, 1, 256]> var_175 = expand_dims(axes = var_175_axes_0, x = var_174)[name = string("op_175")];
            string query_states_1_pad_type_0 = const()[name = string("query_states_1_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> query_states_1_strides_0 = const()[name = string("query_states_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> query_states_1_pad_0 = const()[name = string("query_states_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> query_states_1_dilations_0 = const()[name = string("query_states_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 query_states_1_groups_0 = const()[name = string("query_states_1_groups_0"), val = int32(1)];
            tensor<fp16, [1, 4096, 1, 256]> query_states_1 = conv(dilations = query_states_1_dilations_0, groups = query_states_1_groups_0, pad = query_states_1_pad_0, pad_type = query_states_1_pad_type_0, strides = query_states_1_strides_0, weight = model_model_layers_12_self_attn_q_proj_weight_palettized, x = var_175)[name = string("query_states_1")];
            string key_states_1_pad_type_0 = const()[name = string("key_states_1_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> key_states_1_strides_0 = const()[name = string("key_states_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> key_states_1_pad_0 = const()[name = string("key_states_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> key_states_1_dilations_0 = const()[name = string("key_states_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 key_states_1_groups_0 = const()[name = string("key_states_1_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 256]> key_states_1 = conv(dilations = key_states_1_dilations_0, groups = key_states_1_groups_0, pad = key_states_1_pad_0, pad_type = key_states_1_pad_type_0, strides = key_states_1_strides_0, weight = model_model_layers_12_self_attn_k_proj_weight_palettized, x = var_175)[name = string("key_states_1")];
            string value_states_1_pad_type_0 = const()[name = string("value_states_1_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> value_states_1_strides_0 = const()[name = string("value_states_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> value_states_1_pad_0 = const()[name = string("value_states_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> value_states_1_dilations_0 = const()[name = string("value_states_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 value_states_1_groups_0 = const()[name = string("value_states_1_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 256]> value_states_1 = conv(dilations = value_states_1_dilations_0, groups = value_states_1_groups_0, pad = value_states_1_pad_0, pad_type = value_states_1_pad_type_0, strides = value_states_1_strides_0, weight = model_model_layers_12_self_attn_v_proj_weight_palettized, x = var_175)[name = string("value_states_1")];
            tensor<int32, [4]> var_195 = const()[name = string("op_195"), val = tensor<int32, [4]>([1, 32, 128, 256])];
            tensor<fp16, [1, 32, 128, 256]> var_196 = reshape(shape = var_195, x = query_states_1)[name = string("op_196")];
            tensor<int32, [4]> var_197 = const()[name = string("op_197"), val = tensor<int32, [4]>([0, 1, 3, 2])];
            tensor<int32, [4]> var_199 = const()[name = string("op_199"), val = tensor<int32, [4]>([1, 8, 128, 256])];
            tensor<fp16, [1, 8, 128, 256]> var_200 = reshape(shape = var_199, x = key_states_1)[name = string("op_200")];
            tensor<int32, [4]> var_201 = const()[name = string("op_201"), val = tensor<int32, [4]>([0, 1, 3, 2])];
            tensor<int32, [4]> var_203 = const()[name = string("op_203"), val = tensor<int32, [4]>([1, 8, 128, 256])];
            tensor<fp16, [1, 8, 128, 256]> var_204 = reshape(shape = var_203, x = value_states_1)[name = string("op_204")];
            tensor<int32, [4]> var_205 = const()[name = string("op_205"), val = tensor<int32, [4]>([0, 1, 3, 2])];
            tensor<int32, [4]> var_207 = const()[name = string("op_207"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [4]> var_209 = const()[name = string("op_209"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [4]> x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor<int32, [4]>([1, 32, 256, 64])];
            tensor<bool, [4]> x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 32, 256, 128]> x_1 = transpose(perm = var_197, x = var_196)[name = string("transpose_28")];
            tensor<fp16, [1, 32, 256, 64]> x1_1 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = x_1)[name = string("x1_1")];
            tensor<int32, [4]> x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor<int32, [4]>([1, 32, 256, 128])];
            tensor<bool, [4]> x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 32, 256, 64]> x2_1 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = x_1)[name = string("x2_1")];
            tensor<int32, [4]> cos_7_begin_0 = const()[name = string("cos_7_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> cos_7_end_0 = const()[name = string("cos_7_end_0"), val = tensor<int32, [4]>([1, 1, 256, 64])];
            tensor<bool, [4]> cos_7_end_mask_0 = const()[name = string("cos_7_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 1, 256, 128]> cos_5 = transpose(perm = var_207, x = cos_1_cast_fp16)[name = string("transpose_27")];
            tensor<fp16, [1, 1, 256, 64]> cos_7 = slice_by_index(begin = cos_7_begin_0, end = cos_7_end_0, end_mask = cos_7_end_mask_0, x = cos_5)[name = string("cos_7")];
            tensor<int32, [4]> sin_7_begin_0 = const()[name = string("sin_7_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> sin_7_end_0 = const()[name = string("sin_7_end_0"), val = tensor<int32, [4]>([1, 1, 256, 64])];
            tensor<bool, [4]> sin_7_end_mask_0 = const()[name = string("sin_7_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 1, 256, 128]> sin_5 = transpose(perm = var_209, x = sin_1_cast_fp16)[name = string("transpose_26")];
            tensor<fp16, [1, 1, 256, 64]> sin_7 = slice_by_index(begin = sin_7_begin_0, end = sin_7_end_0, end_mask = sin_7_end_mask_0, x = sin_5)[name = string("sin_7")];
            tensor<fp16, [1, 32, 256, 64]> var_223 = mul(x = x1_1, y = cos_7)[name = string("op_223")];
            tensor<fp16, [1, 32, 256, 64]> var_224 = mul(x = x2_1, y = sin_7)[name = string("op_224")];
            tensor<fp16, [1, 32, 256, 64]> var_225 = sub(x = var_223, y = var_224)[name = string("op_225")];
            tensor<fp16, [1, 32, 256, 64]> var_226 = mul(x = x2_1, y = cos_7)[name = string("op_226")];
            tensor<fp16, [1, 32, 256, 64]> var_227 = mul(x = x1_1, y = sin_7)[name = string("op_227")];
            tensor<fp16, [1, 32, 256, 64]> var_228 = add(x = var_226, y = var_227)[name = string("op_228")];
            bool rotated_1_interleave_0 = const()[name = string("rotated_1_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 32, 256, 128]> rotated_1 = concat(axis = var_36, interleave = rotated_1_interleave_0, values = (var_225, var_228))[name = string("rotated_1")];
            tensor<int32, [4]> x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor<int32, [4]>([1, 8, 256, 64])];
            tensor<bool, [4]> x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 8, 256, 128]> x_5 = transpose(perm = var_201, x = var_200)[name = string("transpose_25")];
            tensor<fp16, [1, 8, 256, 64]> x1_3 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = x_5)[name = string("x1_3")];
            tensor<int32, [4]> x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor<int32, [4]>([1, 8, 256, 128])];
            tensor<bool, [4]> x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 8, 256, 64]> x2_3 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = x_5)[name = string("x2_3")];
            tensor<fp16, [1, 8, 256, 64]> var_244 = mul(x = x1_3, y = cos_7)[name = string("op_244")];
            tensor<fp16, [1, 8, 256, 64]> var_245 = mul(x = x2_3, y = sin_7)[name = string("op_245")];
            tensor<fp16, [1, 8, 256, 64]> var_246 = sub(x = var_244, y = var_245)[name = string("op_246")];
            tensor<fp16, [1, 8, 256, 64]> var_247 = mul(x = x2_3, y = cos_7)[name = string("op_247")];
            tensor<fp16, [1, 8, 256, 64]> var_248 = mul(x = x1_3, y = sin_7)[name = string("op_248")];
            tensor<fp16, [1, 8, 256, 64]> var_249 = add(x = var_247, y = var_248)[name = string("op_249")];
            bool rotated_3_interleave_0 = const()[name = string("rotated_3_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 8, 256, 128]> rotated_3 = concat(axis = var_36, interleave = rotated_3_interleave_0, values = (var_246, var_249))[name = string("rotated_3")];
            tensor<int32, [1]> seq_length_1 = const()[name = string("seq_length_1"), val = tensor<int32, [1]>([256])];
            tensor<int32, [1]> var_258 = add(x = current_pos, y = seq_length_1)[name = string("op_258")];
            tensor<fp16, [64, 8, 1024, 128]> read_state_0 = read_state(input = model_model_kv_cache_0)[name = string("read_state_0")];
            tensor<int32, [1]> expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor<int32, [1]>([12])];
            tensor<int32, [1]> expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor<int32, [1]>([13])];
            int32 concat_2_axis_0 = const()[name = string("concat_2_axis_0"), val = int32(0)];
            bool concat_2_interleave_0 = const()[name = string("concat_2_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_2 = concat(axis = concat_2_axis_0, interleave = concat_2_interleave_0, values = (expand_dims_0, expand_dims_1, current_pos, expand_dims_3))[name = string("concat_2")];
            tensor<int32, [1]> concat_3_values1_0 = const()[name = string("concat_3_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)];
            bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_4, concat_3_values1_0, var_258, concat_3_values3_0))[name = string("concat_3")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [64, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_1_stride_0, update = rotated_3, x = read_state_0)[name = string("model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_8_write_state")];
            tensor<fp16, [64, 8, 1024, 128]> coreml_update_state_8 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_8")];
            tensor<int32, [1]> expand_dims_6 = const()[name = string("expand_dims_6"), val = tensor<int32, [1]>([44])];
            tensor<int32, [1]> expand_dims_7 = const()[name = string("expand_dims_7"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_9 = const()[name = string("expand_dims_9"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_10 = const()[name = string("expand_dims_10"), val = tensor<int32, [1]>([45])];
            int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)];
            bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (expand_dims_6, expand_dims_7, current_pos, expand_dims_9))[name = string("concat_6")];
            tensor<int32, [1]> concat_7_values1_0 = const()[name = string("concat_7_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_7_values3_0 = const()[name = string("concat_7_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_7_axis_0 = const()[name = string("concat_7_axis_0"), val = int32(0)];
            bool concat_7_interleave_0 = const()[name = string("concat_7_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_7 = concat(axis = concat_7_axis_0, interleave = concat_7_interleave_0, values = (expand_dims_10, concat_7_values1_0, var_258, concat_7_values3_0))[name = string("concat_7")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [1, 8, 256, 128]> value_states_3 = transpose(perm = var_205, x = var_204)[name = string("transpose_24")];
            tensor<fp16, [64, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_6, begin_mask = model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0, end = concat_7, end_mask = model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_2_stride_0, update = value_states_3, x = coreml_update_state_8)[name = string("model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_9_write_state")];
            tensor<fp16, [64, 8, 1024, 128]> coreml_update_state_9 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_9")];
            tensor<int32, [4]> var_272_begin_0 = const()[name = string("op_272_begin_0"), val = tensor<int32, [4]>([12, 0, 0, 0])];
            tensor<int32, [4]> var_272_end_0 = const()[name = string("op_272_end_0"), val = tensor<int32, [4]>([13, 8, 1024, 128])];
            tensor<bool, [4]> var_272_end_mask_0 = const()[name = string("op_272_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_272_cast_fp16 = slice_by_index(begin = var_272_begin_0, end = var_272_end_0, end_mask = var_272_end_mask_0, x = coreml_update_state_9)[name = string("op_272_cast_fp16")];
            tensor<int32, [1]> K_layer_cache_1_axes_0 = const()[name = string("K_layer_cache_1_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> K_layer_cache_1_cast_fp16 = squeeze(axes = K_layer_cache_1_axes_0, x = var_272_cast_fp16)[name = string("K_layer_cache_1_cast_fp16")];
            tensor<int32, [4]> var_274_begin_0 = const()[name = string("op_274_begin_0"), val = tensor<int32, [4]>([44, 0, 0, 0])];
            tensor<int32, [4]> var_274_end_0 = const()[name = string("op_274_end_0"), val = tensor<int32, [4]>([45, 8, 1024, 128])];
            tensor<bool, [4]> var_274_end_mask_0 = const()[name = string("op_274_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_274_cast_fp16 = slice_by_index(begin = var_274_begin_0, end = var_274_end_0, end_mask = var_274_end_mask_0, x = coreml_update_state_9)[name = string("op_274_cast_fp16")];
            tensor<int32, [1]> V_layer_cache_1_axes_0 = const()[name = string("V_layer_cache_1_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> V_layer_cache_1_cast_fp16 = squeeze(axes = V_layer_cache_1_axes_0, x = var_274_cast_fp16)[name = string("V_layer_cache_1_cast_fp16")];
            tensor<int32, [1]> x_11_axes_0 = const()[name = string("x_11_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_11_cast_fp16 = expand_dims(axes = x_11_axes_0, x = K_layer_cache_1_cast_fp16)[name = string("x_11_cast_fp16")];
            tensor<int32, [4]> var_283 = const()[name = string("op_283"), val = tensor<int32, [4]>([1, 4, 1, 1])];
            tensor<fp16, [8, 4, 1024, 128]> x_13_cast_fp16 = tile(reps = var_283, x = x_11_cast_fp16)[name = string("x_13_cast_fp16")];
            tensor<int32, [4]> var_287 = const()[name = string("op_287"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
            tensor<fp16, [1, 32, 1024, 128]> var_288_cast_fp16 = reshape(shape = var_287, x = x_13_cast_fp16)[name = string("op_288_cast_fp16")];
            tensor<int32, [1]> x_17_axes_0 = const()[name = string("x_17_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_17_cast_fp16 = expand_dims(axes = x_17_axes_0, x = V_layer_cache_1_cast_fp16)[name = string("x_17_cast_fp16")];
            tensor<int32, [4]> var_290 = const()[name = string("op_290"), val = tensor<int32, [4]>([1, 4, 1, 1])];
            tensor<fp16, [8, 4, 1024, 128]> x_19_cast_fp16 = tile(reps = var_290, x = x_17_cast_fp16)[name = string("x_19_cast_fp16")];
            bool var_297_transpose_x_0 = const()[name = string("op_297_transpose_x_0"), val = bool(false)];
            bool var_297_transpose_y_0 = const()[name = string("op_297_transpose_y_0"), val = bool(true)];
            tensor<fp16, [1, 32, 256, 1024]> var_297_cast_fp16 = matmul(transpose_x = var_297_transpose_x_0, transpose_y = var_297_transpose_y_0, x = rotated_1, y = var_288_cast_fp16)[name = string("op_297_cast_fp16")];
            fp16 var_298_to_fp16 = const()[name = string("op_298_to_fp16"), val = fp16(0x1.6ap-4)];
            tensor<fp16, [1, 32, 256, 1024]> attn_weights_1_cast_fp16 = mul(x = var_297_cast_fp16, y = var_298_to_fp16)[name = string("attn_weights_1_cast_fp16")];
            tensor<fp16, [1, 32, 256, 1024]> x_21_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask)[name = string("x_21_cast_fp16")];
            tensor<int32, [1]> reduce_max_0_axes_0 = const()[name = string("reduce_max_0_axes_0"), val = tensor<int32, [1]>([-1])];
            bool reduce_max_0_keep_dims_0 = const()[name = string("reduce_max_0_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 32, 256, 1]> reduce_max_0_cast_fp16 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = x_21_cast_fp16)[name = string("reduce_max_0_cast_fp16")];
            tensor<fp16, [1, 32, 256, 1024]> x_23_cast_fp16 = sub(x = x_21_cast_fp16, y = reduce_max_0_cast_fp16)[name = string("x_23_cast_fp16")];
            tensor<fp16, [1, 32, 256, 1024]> exp_x_1_cast_fp16 = exp(x = x_23_cast_fp16)[name = string("exp_x_1_cast_fp16")];
            tensor<int32, [1]> var_309_axes_0 = const()[name = string("op_309_axes_0"), val = tensor<int32, [1]>([-1])];
            bool var_309_keep_dims_0 = const()[name = string("op_309_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 32, 256, 1]> var_309_cast_fp16 = reduce_sum(axes = var_309_axes_0, keep_dims = var_309_keep_dims_0, x = exp_x_1_cast_fp16)[name = string("op_309_cast_fp16")];
            tensor<fp16, [1, 32, 256, 1024]> var_310_cast_fp16 = real_div(x = exp_x_1_cast_fp16, y = var_309_cast_fp16)[name = string("op_310_cast_fp16")];
            tensor<int32, [3]> concat_12 = const()[name = string("concat_12"), val = tensor<int32, [3]>([32, 256, 1024])];
            tensor<fp16, [32, 256, 1024]> reshape_0_cast_fp16 = reshape(shape = concat_12, x = var_310_cast_fp16)[name = string("reshape_0_cast_fp16")];
            tensor<int32, [3]> concat_13 = const()[name = string("concat_13"), val = tensor<int32, [3]>([32, 1024, 128])];
            tensor<fp16, [32, 1024, 128]> reshape_1_cast_fp16 = reshape(shape = concat_13, x = x_19_cast_fp16)[name = string("reshape_1_cast_fp16")];
            bool matmul_0_transpose_x_0 = const()[name = string("matmul_0_transpose_x_0"), val = bool(false)];
            bool matmul_0_transpose_y_0 = const()[name = string("matmul_0_transpose_y_0"), val = bool(false)];
            tensor<fp16, [32, 256, 128]> matmul_0_cast_fp16 = matmul(transpose_x = matmul_0_transpose_x_0, transpose_y = matmul_0_transpose_y_0, x = reshape_0_cast_fp16, y = reshape_1_cast_fp16)[name = string("matmul_0_cast_fp16")];
            tensor<int32, [4]> concat_17 = const()[name = string("concat_17"), val = tensor<int32, [4]>([1, 32, 256, 128])];
            tensor<fp16, [1, 32, 256, 128]> reshape_2_cast_fp16 = reshape(shape = concat_17, x = matmul_0_cast_fp16)[name = string("reshape_2_cast_fp16")];
            tensor<int32, [4]> var_313_perm_0 = const()[name = string("op_313_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_315 = const()[name = string("op_315"), val = tensor<int32, [3]>([1, 256, 4096])];
            tensor<fp16, [1, 256, 32, 128]> var_313_cast_fp16 = transpose(perm = var_313_perm_0, x = reshape_2_cast_fp16)[name = string("transpose_23")];
            tensor<fp16, [1, 256, 4096]> input_5_cast_fp16 = reshape(shape = var_315, x = var_313_cast_fp16)[name = string("input_5_cast_fp16")];
            tensor<fp16, [4096, 4096]> model_model_layers_12_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [4096, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(673590528))), lut = tensor<fp16, [512, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686173504))))[name = string("model_model_layers_12_self_attn_o_proj_weight_promoted_to_fp16_palettized")];
            tensor<fp16, [4096]> linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686239104)))];
            tensor<fp16, [1, 256, 4096]> linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_12_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_5_cast_fp16)[name = string("linear_0_cast_fp16")];
            tensor<fp16, [1, 256, 4096]> hidden_states_5_cast_fp16 = add(x = hidden_states, y = linear_0_cast_fp16)[name = string("hidden_states_5_cast_fp16")];
            tensor<int32, [1]> mean_3_axes_0 = const()[name = string("mean_3_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_3_keep_dims_0 = const()[name = string("mean_3_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 256, 1]> mean_3_cast_fp16 = reduce_mean(axes = mean_3_axes_0, keep_dims = mean_3_keep_dims_0, x = hidden_states_5_cast_fp16)[name = string("mean_3_cast_fp16")];
            tensor<fp16, [1, 256, 4096]> input_7_cast_fp16 = sub(x = hidden_states_5_cast_fp16, y = mean_3_cast_fp16)[name = string("input_7_cast_fp16")];
            tensor<int32, [1]> var_326_axes_0 = const()[name = string("op_326_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [4096]> model_model_layers_12_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_12_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686247360)))];
            tensor<fp16, [1, 256, 4096]> var_326_cast_fp16 = layer_norm(axes = var_326_axes_0, epsilon = var_38_to_fp16, gamma = model_model_layers_12_post_attention_layernorm_weight_to_fp16, x = input_7_cast_fp16)[name = string("op_326_cast_fp16")];
            tensor<int32, [3]> var_333 = const()[name = string("op_333"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> input_9_axes_0 = const()[name = string("input_9_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 4096, 256]> var_334 = transpose(perm = var_333, x = var_326_cast_fp16)[name = string("transpose_22")];
            tensor<fp16, [1, 4096, 1, 256]> input_9 = expand_dims(axes = input_9_axes_0, x = var_334)[name = string("input_9")];
            string input_11_pad_type_0 = const()[name = string("input_11_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> input_11_strides_0 = const()[name = string("input_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> input_11_pad_0 = const()[name = string("input_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> input_11_dilations_0 = const()[name = string("input_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 input_11_groups_0 = const()[name = string("input_11_groups_0"), val = int32(1)];
            tensor<fp16, [1, 14336, 1, 256]> input_11 = conv(dilations = input_11_dilations_0, groups = input_11_groups_0, pad = input_11_pad_0, pad_type = input_11_pad_type_0, strides = input_11_strides_0, weight = model_model_layers_12_mlp_gate_proj_weight_palettized, x = input_9)[name = string("input_11")];
            string up_states_1_pad_type_0 = const()[name = string("up_states_1_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> up_states_1_strides_0 = const()[name = string("up_states_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> up_states_1_pad_0 = const()[name = string("up_states_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> up_states_1_dilations_0 = const()[name = string("up_states_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 up_states_1_groups_0 = const()[name = string("up_states_1_groups_0"), val = int32(1)];
            tensor<fp16, [1, 14336, 1, 256]> up_states_1 = conv(dilations = up_states_1_dilations_0, groups = up_states_1_groups_0, pad = up_states_1_pad_0, pad_type = up_states_1_pad_type_0, strides = up_states_1_strides_0, weight = model_model_layers_12_mlp_up_proj_weight_palettized, x = input_9)[name = string("up_states_1")];
            tensor<fp16, [1, 14336, 1, 256]> gate_states_1 = silu(x = input_11)[name = string("gate_states_1")];
            tensor<fp16, [1, 14336, 1, 256]> input_13 = mul(x = gate_states_1, y = up_states_1)[name = string("input_13")];
            string hidden_states_7_pad_type_0 = const()[name = string("hidden_states_7_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> hidden_states_7_strides_0 = const()[name = string("hidden_states_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> hidden_states_7_pad_0 = const()[name = string("hidden_states_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> hidden_states_7_dilations_0 = const()[name = string("hidden_states_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 hidden_states_7_groups_0 = const()[name = string("hidden_states_7_groups_0"), val = int32(1)];
            tensor<fp16, [1, 4096, 1, 256]> hidden_states_7 = conv(dilations = hidden_states_7_dilations_0, groups = hidden_states_7_groups_0, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = hidden_states_7_strides_0, weight = model_model_layers_12_mlp_down_proj_weight_palettized, x = input_13)[name = string("hidden_states_7")];
            tensor<int32, [1]> var_356_axes_0 = const()[name = string("op_356_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 4096, 256]> var_356 = squeeze(axes = var_356_axes_0, x = hidden_states_7)[name = string("op_356")];
            tensor<int32, [3]> var_357 = const()[name = string("op_357"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 256, 4096]> var_358 = transpose(perm = var_357, x = var_356)[name = string("transpose_21")];
            tensor<fp16, [1, 256, 4096]> hidden_states_9_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = var_358)[name = string("hidden_states_9_cast_fp16")];
            tensor<int32, [1]> mean_5_axes_0 = const()[name = string("mean_5_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_5_keep_dims_0 = const()[name = string("mean_5_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 256, 1]> mean_5_cast_fp16 = reduce_mean(axes = mean_5_axes_0, keep_dims = mean_5_keep_dims_0, x = hidden_states_9_cast_fp16)[name = string("mean_5_cast_fp16")];
            tensor<fp16, [1, 256, 4096]> input_15_cast_fp16 = sub(x = hidden_states_9_cast_fp16, y = mean_5_cast_fp16)[name = string("input_15_cast_fp16")];
            tensor<int32, [1]> var_366_axes_0 = const()[name = string("op_366_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [4096]> model_model_layers_13_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_13_input_layernorm_weight_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686255616)))];
            tensor<fp16, [1, 256, 4096]> var_366_cast_fp16 = layer_norm(axes = var_366_axes_0, epsilon = var_38_to_fp16, gamma = model_model_layers_13_input_layernorm_weight_to_fp16, x = input_15_cast_fp16)[name = string("op_366_cast_fp16")];
            tensor<int32, [3]> var_370 = const()[name = string("op_370"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> var_372_axes_0 = const()[name = string("op_372_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 4096, 256]> var_371 = transpose(perm = var_370, x = var_366_cast_fp16)[name = string("transpose_20")];
            tensor<fp16, [1, 4096, 1, 256]> var_372 = expand_dims(axes = var_372_axes_0, x = var_371)[name = string("op_372")];
            string query_states_5_pad_type_0 = const()[name = string("query_states_5_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> query_states_5_strides_0 = const()[name = string("query_states_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> query_states_5_pad_0 = const()[name = string("query_states_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> query_states_5_dilations_0 = const()[name = string("query_states_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 query_states_5_groups_0 = const()[name = string("query_states_5_groups_0"), val = int32(1)];
            tensor<fp16, [1, 4096, 1, 256]> query_states_5 = conv(dilations = query_states_5_dilations_0, groups = query_states_5_groups_0, pad = query_states_5_pad_0, pad_type = query_states_5_pad_type_0, strides = query_states_5_strides_0, weight = model_model_layers_13_self_attn_q_proj_weight_palettized, x = var_372)[name = string("query_states_5")];
            string key_states_7_pad_type_0 = const()[name = string("key_states_7_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> key_states_7_strides_0 = const()[name = string("key_states_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> key_states_7_pad_0 = const()[name = string("key_states_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> key_states_7_dilations_0 = const()[name = string("key_states_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 key_states_7_groups_0 = const()[name = string("key_states_7_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 256]> key_states_7 = conv(dilations = key_states_7_dilations_0, groups = key_states_7_groups_0, pad = key_states_7_pad_0, pad_type = key_states_7_pad_type_0, strides = key_states_7_strides_0, weight = model_model_layers_13_self_attn_k_proj_weight_palettized, x = var_372)[name = string("key_states_7")];
            string value_states_7_pad_type_0 = const()[name = string("value_states_7_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> value_states_7_strides_0 = const()[name = string("value_states_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> value_states_7_pad_0 = const()[name = string("value_states_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> value_states_7_dilations_0 = const()[name = string("value_states_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 value_states_7_groups_0 = const()[name = string("value_states_7_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 256]> value_states_7 = conv(dilations = value_states_7_dilations_0, groups = value_states_7_groups_0, pad = value_states_7_pad_0, pad_type = value_states_7_pad_type_0, strides = value_states_7_strides_0, weight = model_model_layers_13_self_attn_v_proj_weight_palettized, x = var_372)[name = string("value_states_7")];
            tensor<int32, [4]> var_392 = const()[name = string("op_392"), val = tensor<int32, [4]>([1, 32, 128, 256])];
            tensor<fp16, [1, 32, 128, 256]> var_393 = reshape(shape = var_392, x = query_states_5)[name = string("op_393")];
            tensor<int32, [4]> var_394 = const()[name = string("op_394"), val = tensor<int32, [4]>([0, 1, 3, 2])];
            tensor<int32, [4]> var_396 = const()[name = string("op_396"), val = tensor<int32, [4]>([1, 8, 128, 256])];
            tensor<fp16, [1, 8, 128, 256]> var_397 = reshape(shape = var_396, x = key_states_7)[name = string("op_397")];
            tensor<int32, [4]> var_398 = const()[name = string("op_398"), val = tensor<int32, [4]>([0, 1, 3, 2])];
            tensor<int32, [4]> var_400 = const()[name = string("op_400"), val = tensor<int32, [4]>([1, 8, 128, 256])];
            tensor<fp16, [1, 8, 128, 256]> var_401 = reshape(shape = var_400, x = value_states_7)[name = string("op_401")];
            tensor<int32, [4]> var_402 = const()[name = string("op_402"), val = tensor<int32, [4]>([0, 1, 3, 2])];
            tensor<int32, [4]> x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor<int32, [4]>([1, 32, 256, 64])];
            tensor<bool, [4]> x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 32, 256, 128]> x_29 = transpose(perm = var_394, x = var_393)[name = string("transpose_19")];
            tensor<fp16, [1, 32, 256, 64]> x1_5 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = x_29)[name = string("x1_5")];
            tensor<int32, [4]> x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor<int32, [4]>([1, 32, 256, 128])];
            tensor<bool, [4]> x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 32, 256, 64]> x2_5 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = x_29)[name = string("x2_5")];
            tensor<fp16, [1, 32, 256, 64]> var_420 = mul(x = x1_5, y = cos_7)[name = string("op_420")];
            tensor<fp16, [1, 32, 256, 64]> var_421 = mul(x = x2_5, y = sin_7)[name = string("op_421")];
            tensor<fp16, [1, 32, 256, 64]> var_422 = sub(x = var_420, y = var_421)[name = string("op_422")];
            tensor<fp16, [1, 32, 256, 64]> var_423 = mul(x = x2_5, y = cos_7)[name = string("op_423")];
            tensor<fp16, [1, 32, 256, 64]> var_424 = mul(x = x1_5, y = sin_7)[name = string("op_424")];
            tensor<fp16, [1, 32, 256, 64]> var_425 = add(x = var_423, y = var_424)[name = string("op_425")];
            bool rotated_5_interleave_0 = const()[name = string("rotated_5_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 32, 256, 128]> rotated_5 = concat(axis = var_36, interleave = rotated_5_interleave_0, values = (var_422, var_425))[name = string("rotated_5")];
            tensor<int32, [4]> x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor<int32, [4]>([1, 8, 256, 64])];
            tensor<bool, [4]> x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 8, 256, 128]> x_33 = transpose(perm = var_398, x = var_397)[name = string("transpose_18")];
            tensor<fp16, [1, 8, 256, 64]> x1_7 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = x_33)[name = string("x1_7")];
            tensor<int32, [4]> x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor<int32, [4]>([1, 8, 256, 128])];
            tensor<bool, [4]> x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 8, 256, 64]> x2_7 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = x_33)[name = string("x2_7")];
            tensor<fp16, [1, 8, 256, 64]> var_441 = mul(x = x1_7, y = cos_7)[name = string("op_441")];
            tensor<fp16, [1, 8, 256, 64]> var_442 = mul(x = x2_7, y = sin_7)[name = string("op_442")];
            tensor<fp16, [1, 8, 256, 64]> var_443 = sub(x = var_441, y = var_442)[name = string("op_443")];
            tensor<fp16, [1, 8, 256, 64]> var_444 = mul(x = x2_7, y = cos_7)[name = string("op_444")];
            tensor<fp16, [1, 8, 256, 64]> var_445 = mul(x = x1_7, y = sin_7)[name = string("op_445")];
            tensor<fp16, [1, 8, 256, 64]> var_446 = add(x = var_444, y = var_445)[name = string("op_446")];
            bool rotated_7_interleave_0 = const()[name = string("rotated_7_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 8, 256, 128]> rotated_7 = concat(axis = var_36, interleave = rotated_7_interleave_0, values = (var_443, var_446))[name = string("rotated_7")];
            tensor<int32, [1]> expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor<int32, [1]>([13])];
            tensor<int32, [1]> expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor<int32, [1]>([14])];
            int32 concat_20_axis_0 = const()[name = string("concat_20_axis_0"), val = int32(0)];
            bool concat_20_interleave_0 = const()[name = string("concat_20_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_20 = concat(axis = concat_20_axis_0, interleave = concat_20_interleave_0, values = (expand_dims_12, expand_dims_13, current_pos, expand_dims_15))[name = string("concat_20")];
            tensor<int32, [1]> concat_21_values1_0 = const()[name = string("concat_21_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_21_values3_0 = const()[name = string("concat_21_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_21_axis_0 = const()[name = string("concat_21_axis_0"), val = int32(0)];
            bool concat_21_interleave_0 = const()[name = string("concat_21_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_21 = concat(axis = concat_21_axis_0, interleave = concat_21_interleave_0, values = (expand_dims_16, concat_21_values1_0, var_258, concat_21_values3_0))[name = string("concat_21")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [64, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_20, begin_mask = model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0, end = concat_21, end_mask = model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_3_stride_0, update = rotated_7, x = coreml_update_state_9)[name = string("model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_10_write_state")];
            tensor<fp16, [64, 8, 1024, 128]> coreml_update_state_10 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_10")];
            tensor<int32, [1]> expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor<int32, [1]>([45])];
            tensor<int32, [1]> expand_dims_19 = const()[name = string("expand_dims_19"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor<int32, [1]>([46])];
            int32 concat_24_axis_0 = const()[name = string("concat_24_axis_0"), val = int32(0)];
            bool concat_24_interleave_0 = const()[name = string("concat_24_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_24 = concat(axis = concat_24_axis_0, interleave = concat_24_interleave_0, values = (expand_dims_18, expand_dims_19, current_pos, expand_dims_21))[name = string("concat_24")];
            tensor<int32, [1]> concat_25_values1_0 = const()[name = string("concat_25_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_25_values3_0 = const()[name = string("concat_25_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_25_axis_0 = const()[name = string("concat_25_axis_0"), val = int32(0)];
            bool concat_25_interleave_0 = const()[name = string("concat_25_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_25 = concat(axis = concat_25_axis_0, interleave = concat_25_interleave_0, values = (expand_dims_22, concat_25_values1_0, var_258, concat_25_values3_0))[name = string("concat_25")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [1, 8, 256, 128]> value_states_9 = transpose(perm = var_402, x = var_401)[name = string("transpose_17")];
            tensor<fp16, [64, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_24, begin_mask = model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0, end = concat_25, end_mask = model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_4_stride_0, update = value_states_9, x = coreml_update_state_10)[name = string("model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_11_write_state")];
            tensor<fp16, [64, 8, 1024, 128]> coreml_update_state_11 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_11")];
            tensor<int32, [4]> var_469_begin_0 = const()[name = string("op_469_begin_0"), val = tensor<int32, [4]>([13, 0, 0, 0])];
            tensor<int32, [4]> var_469_end_0 = const()[name = string("op_469_end_0"), val = tensor<int32, [4]>([14, 8, 1024, 128])];
            tensor<bool, [4]> var_469_end_mask_0 = const()[name = string("op_469_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_469_cast_fp16 = slice_by_index(begin = var_469_begin_0, end = var_469_end_0, end_mask = var_469_end_mask_0, x = coreml_update_state_11)[name = string("op_469_cast_fp16")];
            tensor<int32, [1]> K_layer_cache_3_axes_0 = const()[name = string("K_layer_cache_3_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> K_layer_cache_3_cast_fp16 = squeeze(axes = K_layer_cache_3_axes_0, x = var_469_cast_fp16)[name = string("K_layer_cache_3_cast_fp16")];
            tensor<int32, [4]> var_471_begin_0 = const()[name = string("op_471_begin_0"), val = tensor<int32, [4]>([45, 0, 0, 0])];
            tensor<int32, [4]> var_471_end_0 = const()[name = string("op_471_end_0"), val = tensor<int32, [4]>([46, 8, 1024, 128])];
            tensor<bool, [4]> var_471_end_mask_0 = const()[name = string("op_471_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_471_cast_fp16 = slice_by_index(begin = var_471_begin_0, end = var_471_end_0, end_mask = var_471_end_mask_0, x = coreml_update_state_11)[name = string("op_471_cast_fp16")];
            tensor<int32, [1]> V_layer_cache_3_axes_0 = const()[name = string("V_layer_cache_3_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> V_layer_cache_3_cast_fp16 = squeeze(axes = V_layer_cache_3_axes_0, x = var_471_cast_fp16)[name = string("V_layer_cache_3_cast_fp16")];
            tensor<int32, [1]> x_39_axes_0 = const()[name = string("x_39_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_39_cast_fp16 = expand_dims(axes = x_39_axes_0, x = K_layer_cache_3_cast_fp16)[name = string("x_39_cast_fp16")];
            tensor<int32, [4]> var_480 = const()[name = string("op_480"), val = tensor<int32, [4]>([1, 4, 1, 1])];
            tensor<fp16, [8, 4, 1024, 128]> x_41_cast_fp16 = tile(reps = var_480, x = x_39_cast_fp16)[name = string("x_41_cast_fp16")];
            tensor<int32, [4]> var_484 = const()[name = string("op_484"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
            tensor<fp16, [1, 32, 1024, 128]> var_485_cast_fp16 = reshape(shape = var_484, x = x_41_cast_fp16)[name = string("op_485_cast_fp16")];
            tensor<int32, [1]> x_45_axes_0 = const()[name = string("x_45_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_45_cast_fp16 = expand_dims(axes = x_45_axes_0, x = V_layer_cache_3_cast_fp16)[name = string("x_45_cast_fp16")];
            tensor<int32, [4]> var_487 = const()[name = string("op_487"), val = tensor<int32, [4]>([1, 4, 1, 1])];
            tensor<fp16, [8, 4, 1024, 128]> x_47_cast_fp16 = tile(reps = var_487, x = x_45_cast_fp16)[name = string("x_47_cast_fp16")];
            bool var_494_transpose_x_0 = const()[name = string("op_494_transpose_x_0"), val = bool(false)];
            bool var_494_transpose_y_0 = const()[name = string("op_494_transpose_y_0"), val = bool(true)];
            tensor<fp16, [1, 32, 256, 1024]> var_494_cast_fp16 = matmul(transpose_x = var_494_transpose_x_0, transpose_y = var_494_transpose_y_0, x = rotated_5, y = var_485_cast_fp16)[name = string("op_494_cast_fp16")];
            fp16 var_495_to_fp16 = const()[name = string("op_495_to_fp16"), val = fp16(0x1.6ap-4)];
            tensor<fp16, [1, 32, 256, 1024]> attn_weights_3_cast_fp16 = mul(x = var_494_cast_fp16, y = var_495_to_fp16)[name = string("attn_weights_3_cast_fp16")];
            tensor<fp16, [1, 32, 256, 1024]> x_49_cast_fp16 = add(x = attn_weights_3_cast_fp16, y = causal_mask)[name = string("x_49_cast_fp16")];
            tensor<int32, [1]> reduce_max_1_axes_0 = const()[name = string("reduce_max_1_axes_0"), val = tensor<int32, [1]>([-1])];
            bool reduce_max_1_keep_dims_0 = const()[name = string("reduce_max_1_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 32, 256, 1]> reduce_max_1_cast_fp16 = reduce_max(axes = reduce_max_1_axes_0, keep_dims = reduce_max_1_keep_dims_0, x = x_49_cast_fp16)[name = string("reduce_max_1_cast_fp16")];
            tensor<fp16, [1, 32, 256, 1024]> x_51_cast_fp16 = sub(x = x_49_cast_fp16, y = reduce_max_1_cast_fp16)[name = string("x_51_cast_fp16")];
            tensor<fp16, [1, 32, 256, 1024]> exp_x_3_cast_fp16 = exp(x = x_51_cast_fp16)[name = string("exp_x_3_cast_fp16")];
            tensor<int32, [1]> var_506_axes_0 = const()[name = string("op_506_axes_0"), val = tensor<int32, [1]>([-1])];
            bool var_506_keep_dims_0 = const()[name = string("op_506_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 32, 256, 1]> var_506_cast_fp16 = reduce_sum(axes = var_506_axes_0, keep_dims = var_506_keep_dims_0, x = exp_x_3_cast_fp16)[name = string("op_506_cast_fp16")];
            tensor<fp16, [1, 32, 256, 1024]> var_507_cast_fp16 = real_div(x = exp_x_3_cast_fp16, y = var_506_cast_fp16)[name = string("op_507_cast_fp16")];
            tensor<int32, [3]> concat_30 = const()[name = string("concat_30"), val = tensor<int32, [3]>([32, 256, 1024])];
            tensor<fp16, [32, 256, 1024]> reshape_3_cast_fp16 = reshape(shape = concat_30, x = var_507_cast_fp16)[name = string("reshape_3_cast_fp16")];
            tensor<int32, [3]> concat_31 = const()[name = string("concat_31"), val = tensor<int32, [3]>([32, 1024, 128])];
            tensor<fp16, [32, 1024, 128]> reshape_4_cast_fp16 = reshape(shape = concat_31, x = x_47_cast_fp16)[name = string("reshape_4_cast_fp16")];
            bool matmul_1_transpose_x_0 = const()[name = string("matmul_1_transpose_x_0"), val = bool(false)];
            bool matmul_1_transpose_y_0 = const()[name = string("matmul_1_transpose_y_0"), val = bool(false)];
            tensor<fp16, [32, 256, 128]> matmul_1_cast_fp16 = matmul(transpose_x = matmul_1_transpose_x_0, transpose_y = matmul_1_transpose_y_0, x = reshape_3_cast_fp16, y = reshape_4_cast_fp16)[name = string("matmul_1_cast_fp16")];
            tensor<int32, [4]> concat_35 = const()[name = string("concat_35"), val = tensor<int32, [4]>([1, 32, 256, 128])];
            tensor<fp16, [1, 32, 256, 128]> reshape_5_cast_fp16 = reshape(shape = concat_35, x = matmul_1_cast_fp16)[name = string("reshape_5_cast_fp16")];
            tensor<int32, [4]> var_510_perm_0 = const()[name = string("op_510_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_512 = const()[name = string("op_512"), val = tensor<int32, [3]>([1, 256, 4096])];
            tensor<fp16, [1, 256, 32, 128]> var_510_cast_fp16 = transpose(perm = var_510_perm_0, x = reshape_5_cast_fp16)[name = string("transpose_16")];
            tensor<fp16, [1, 256, 4096]> input_19_cast_fp16 = reshape(shape = var_512, x = var_510_cast_fp16)[name = string("input_19_cast_fp16")];
            tensor<fp16, [4096, 4096]> model_model_layers_13_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [4096, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(686263872))), lut = tensor<fp16, [512, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698846848))))[name = string("model_model_layers_13_self_attn_o_proj_weight_promoted_to_fp16_palettized")];
            tensor<fp16, [1, 256, 4096]> linear_1_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_13_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_19_cast_fp16)[name = string("linear_1_cast_fp16")];
            tensor<fp16, [1, 256, 4096]> hidden_states_13_cast_fp16 = add(x = hidden_states_9_cast_fp16, y = linear_1_cast_fp16)[name = string("hidden_states_13_cast_fp16")];
            tensor<int32, [1]> mean_7_axes_0 = const()[name = string("mean_7_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_7_keep_dims_0 = const()[name = string("mean_7_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 256, 1]> mean_7_cast_fp16 = reduce_mean(axes = mean_7_axes_0, keep_dims = mean_7_keep_dims_0, x = hidden_states_13_cast_fp16)[name = string("mean_7_cast_fp16")];
            tensor<fp16, [1, 256, 4096]> input_21_cast_fp16 = sub(x = hidden_states_13_cast_fp16, y = mean_7_cast_fp16)[name = string("input_21_cast_fp16")];
            tensor<int32, [1]> var_523_axes_0 = const()[name = string("op_523_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [4096]> model_model_layers_13_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_13_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698912448)))];
            tensor<fp16, [1, 256, 4096]> var_523_cast_fp16 = layer_norm(axes = var_523_axes_0, epsilon = var_38_to_fp16, gamma = model_model_layers_13_post_attention_layernorm_weight_to_fp16, x = input_21_cast_fp16)[name = string("op_523_cast_fp16")];
            tensor<int32, [3]> var_530 = const()[name = string("op_530"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> input_23_axes_0 = const()[name = string("input_23_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 4096, 256]> var_531 = transpose(perm = var_530, x = var_523_cast_fp16)[name = string("transpose_15")];
            tensor<fp16, [1, 4096, 1, 256]> input_23 = expand_dims(axes = input_23_axes_0, x = var_531)[name = string("input_23")];
            string input_25_pad_type_0 = const()[name = string("input_25_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> input_25_strides_0 = const()[name = string("input_25_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> input_25_pad_0 = const()[name = string("input_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> input_25_dilations_0 = const()[name = string("input_25_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 input_25_groups_0 = const()[name = string("input_25_groups_0"), val = int32(1)];
            tensor<fp16, [1, 14336, 1, 256]> input_25 = conv(dilations = input_25_dilations_0, groups = input_25_groups_0, pad = input_25_pad_0, pad_type = input_25_pad_type_0, strides = input_25_strides_0, weight = model_model_layers_13_mlp_gate_proj_weight_palettized, x = input_23)[name = string("input_25")];
            string up_states_3_pad_type_0 = const()[name = string("up_states_3_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> up_states_3_strides_0 = const()[name = string("up_states_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> up_states_3_pad_0 = const()[name = string("up_states_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> up_states_3_dilations_0 = const()[name = string("up_states_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 up_states_3_groups_0 = const()[name = string("up_states_3_groups_0"), val = int32(1)];
            tensor<fp16, [1, 14336, 1, 256]> up_states_3 = conv(dilations = up_states_3_dilations_0, groups = up_states_3_groups_0, pad = up_states_3_pad_0, pad_type = up_states_3_pad_type_0, strides = up_states_3_strides_0, weight = model_model_layers_13_mlp_up_proj_weight_palettized, x = input_23)[name = string("up_states_3")];
            tensor<fp16, [1, 14336, 1, 256]> gate_states_3 = silu(x = input_25)[name = string("gate_states_3")];
            tensor<fp16, [1, 14336, 1, 256]> input_27 = mul(x = gate_states_3, y = up_states_3)[name = string("input_27")];
            string hidden_states_15_pad_type_0 = const()[name = string("hidden_states_15_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> hidden_states_15_strides_0 = const()[name = string("hidden_states_15_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> hidden_states_15_pad_0 = const()[name = string("hidden_states_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> hidden_states_15_dilations_0 = const()[name = string("hidden_states_15_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 hidden_states_15_groups_0 = const()[name = string("hidden_states_15_groups_0"), val = int32(1)];
            tensor<fp16, [1, 4096, 1, 256]> hidden_states_15 = conv(dilations = hidden_states_15_dilations_0, groups = hidden_states_15_groups_0, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = hidden_states_15_strides_0, weight = model_model_layers_13_mlp_down_proj_weight_palettized, x = input_27)[name = string("hidden_states_15")];
            tensor<int32, [1]> var_553_axes_0 = const()[name = string("op_553_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 4096, 256]> var_553 = squeeze(axes = var_553_axes_0, x = hidden_states_15)[name = string("op_553")];
            tensor<int32, [3]> var_554 = const()[name = string("op_554"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 256, 4096]> var_555 = transpose(perm = var_554, x = var_553)[name = string("transpose_14")];
            tensor<fp16, [1, 256, 4096]> hidden_states_17_cast_fp16 = add(x = hidden_states_13_cast_fp16, y = var_555)[name = string("hidden_states_17_cast_fp16")];
            tensor<int32, [1]> mean_9_axes_0 = const()[name = string("mean_9_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_9_keep_dims_0 = const()[name = string("mean_9_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 256, 1]> mean_9_cast_fp16 = reduce_mean(axes = mean_9_axes_0, keep_dims = mean_9_keep_dims_0, x = hidden_states_17_cast_fp16)[name = string("mean_9_cast_fp16")];
            tensor<fp16, [1, 256, 4096]> input_29_cast_fp16 = sub(x = hidden_states_17_cast_fp16, y = mean_9_cast_fp16)[name = string("input_29_cast_fp16")];
            tensor<int32, [1]> var_563_axes_0 = const()[name = string("op_563_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [4096]> model_model_layers_14_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_14_input_layernorm_weight_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698920704)))];
            tensor<fp16, [1, 256, 4096]> var_563_cast_fp16 = layer_norm(axes = var_563_axes_0, epsilon = var_38_to_fp16, gamma = model_model_layers_14_input_layernorm_weight_to_fp16, x = input_29_cast_fp16)[name = string("op_563_cast_fp16")];
            tensor<int32, [3]> var_567 = const()[name = string("op_567"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> var_569_axes_0 = const()[name = string("op_569_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 4096, 256]> var_568 = transpose(perm = var_567, x = var_563_cast_fp16)[name = string("transpose_13")];
            tensor<fp16, [1, 4096, 1, 256]> var_569 = expand_dims(axes = var_569_axes_0, x = var_568)[name = string("op_569")];
            string query_states_9_pad_type_0 = const()[name = string("query_states_9_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> query_states_9_strides_0 = const()[name = string("query_states_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> query_states_9_pad_0 = const()[name = string("query_states_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> query_states_9_dilations_0 = const()[name = string("query_states_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 query_states_9_groups_0 = const()[name = string("query_states_9_groups_0"), val = int32(1)];
            tensor<fp16, [1, 4096, 1, 256]> query_states_9 = conv(dilations = query_states_9_dilations_0, groups = query_states_9_groups_0, pad = query_states_9_pad_0, pad_type = query_states_9_pad_type_0, strides = query_states_9_strides_0, weight = model_model_layers_14_self_attn_q_proj_weight_palettized, x = var_569)[name = string("query_states_9")];
            string key_states_13_pad_type_0 = const()[name = string("key_states_13_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> key_states_13_strides_0 = const()[name = string("key_states_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> key_states_13_pad_0 = const()[name = string("key_states_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> key_states_13_dilations_0 = const()[name = string("key_states_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 key_states_13_groups_0 = const()[name = string("key_states_13_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 256]> key_states_13 = conv(dilations = key_states_13_dilations_0, groups = key_states_13_groups_0, pad = key_states_13_pad_0, pad_type = key_states_13_pad_type_0, strides = key_states_13_strides_0, weight = model_model_layers_14_self_attn_k_proj_weight_palettized, x = var_569)[name = string("key_states_13")];
            string value_states_13_pad_type_0 = const()[name = string("value_states_13_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> value_states_13_strides_0 = const()[name = string("value_states_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> value_states_13_pad_0 = const()[name = string("value_states_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> value_states_13_dilations_0 = const()[name = string("value_states_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 value_states_13_groups_0 = const()[name = string("value_states_13_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 256]> value_states_13 = conv(dilations = value_states_13_dilations_0, groups = value_states_13_groups_0, pad = value_states_13_pad_0, pad_type = value_states_13_pad_type_0, strides = value_states_13_strides_0, weight = model_model_layers_14_self_attn_v_proj_weight_palettized, x = var_569)[name = string("value_states_13")];
            tensor<int32, [4]> var_589 = const()[name = string("op_589"), val = tensor<int32, [4]>([1, 32, 128, 256])];
            tensor<fp16, [1, 32, 128, 256]> var_590 = reshape(shape = var_589, x = query_states_9)[name = string("op_590")];
            tensor<int32, [4]> var_591 = const()[name = string("op_591"), val = tensor<int32, [4]>([0, 1, 3, 2])];
            tensor<int32, [4]> var_593 = const()[name = string("op_593"), val = tensor<int32, [4]>([1, 8, 128, 256])];
            tensor<fp16, [1, 8, 128, 256]> var_594 = reshape(shape = var_593, x = key_states_13)[name = string("op_594")];
            tensor<int32, [4]> var_595 = const()[name = string("op_595"), val = tensor<int32, [4]>([0, 1, 3, 2])];
            tensor<int32, [4]> var_597 = const()[name = string("op_597"), val = tensor<int32, [4]>([1, 8, 128, 256])];
            tensor<fp16, [1, 8, 128, 256]> var_598 = reshape(shape = var_597, x = value_states_13)[name = string("op_598")];
            tensor<int32, [4]> var_599 = const()[name = string("op_599"), val = tensor<int32, [4]>([0, 1, 3, 2])];
            tensor<int32, [4]> x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor<int32, [4]>([1, 32, 256, 64])];
            tensor<bool, [4]> x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 32, 256, 128]> x_57 = transpose(perm = var_591, x = var_590)[name = string("transpose_12")];
            tensor<fp16, [1, 32, 256, 64]> x1_9 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = x_57)[name = string("x1_9")];
            tensor<int32, [4]> x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor<int32, [4]>([1, 32, 256, 128])];
            tensor<bool, [4]> x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 32, 256, 64]> x2_9 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = x_57)[name = string("x2_9")];
            tensor<fp16, [1, 32, 256, 64]> var_617 = mul(x = x1_9, y = cos_7)[name = string("op_617")];
            tensor<fp16, [1, 32, 256, 64]> var_618 = mul(x = x2_9, y = sin_7)[name = string("op_618")];
            tensor<fp16, [1, 32, 256, 64]> var_619 = sub(x = var_617, y = var_618)[name = string("op_619")];
            tensor<fp16, [1, 32, 256, 64]> var_620 = mul(x = x2_9, y = cos_7)[name = string("op_620")];
            tensor<fp16, [1, 32, 256, 64]> var_621 = mul(x = x1_9, y = sin_7)[name = string("op_621")];
            tensor<fp16, [1, 32, 256, 64]> var_622 = add(x = var_620, y = var_621)[name = string("op_622")];
            bool rotated_9_interleave_0 = const()[name = string("rotated_9_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 32, 256, 128]> rotated_9 = concat(axis = var_36, interleave = rotated_9_interleave_0, values = (var_619, var_622))[name = string("rotated_9")];
            tensor<int32, [4]> x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor<int32, [4]>([1, 8, 256, 64])];
            tensor<bool, [4]> x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 8, 256, 128]> x_61 = transpose(perm = var_595, x = var_594)[name = string("transpose_11")];
            tensor<fp16, [1, 8, 256, 64]> x1_11 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = x_61)[name = string("x1_11")];
            tensor<int32, [4]> x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor<int32, [4]>([1, 8, 256, 128])];
            tensor<bool, [4]> x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 8, 256, 64]> x2_11 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = x_61)[name = string("x2_11")];
            tensor<fp16, [1, 8, 256, 64]> var_638 = mul(x = x1_11, y = cos_7)[name = string("op_638")];
            tensor<fp16, [1, 8, 256, 64]> var_639 = mul(x = x2_11, y = sin_7)[name = string("op_639")];
            tensor<fp16, [1, 8, 256, 64]> var_640 = sub(x = var_638, y = var_639)[name = string("op_640")];
            tensor<fp16, [1, 8, 256, 64]> var_641 = mul(x = x2_11, y = cos_7)[name = string("op_641")];
            tensor<fp16, [1, 8, 256, 64]> var_642 = mul(x = x1_11, y = sin_7)[name = string("op_642")];
            tensor<fp16, [1, 8, 256, 64]> var_643 = add(x = var_641, y = var_642)[name = string("op_643")];
            bool rotated_11_interleave_0 = const()[name = string("rotated_11_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 8, 256, 128]> rotated_11 = concat(axis = var_36, interleave = rotated_11_interleave_0, values = (var_640, var_643))[name = string("rotated_11")];
            tensor<int32, [1]> expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor<int32, [1]>([14])];
            tensor<int32, [1]> expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_28 = const()[name = string("expand_dims_28"), val = tensor<int32, [1]>([15])];
            int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)];
            bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (expand_dims_24, expand_dims_25, current_pos, expand_dims_27))[name = string("concat_38")];
            tensor<int32, [1]> concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)];
            bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (expand_dims_28, concat_39_values1_0, var_258, concat_39_values3_0))[name = string("concat_39")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [64, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_38, begin_mask = model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0, end = concat_39, end_mask = model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_5_stride_0, update = rotated_11, x = coreml_update_state_11)[name = string("model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_12_write_state")];
            tensor<fp16, [64, 8, 1024, 128]> coreml_update_state_12 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_12")];
            tensor<int32, [1]> expand_dims_30 = const()[name = string("expand_dims_30"), val = tensor<int32, [1]>([46])];
            tensor<int32, [1]> expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_33 = const()[name = string("expand_dims_33"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor<int32, [1]>([47])];
            int32 concat_42_axis_0 = const()[name = string("concat_42_axis_0"), val = int32(0)];
            bool concat_42_interleave_0 = const()[name = string("concat_42_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_42 = concat(axis = concat_42_axis_0, interleave = concat_42_interleave_0, values = (expand_dims_30, expand_dims_31, current_pos, expand_dims_33))[name = string("concat_42")];
            tensor<int32, [1]> concat_43_values1_0 = const()[name = string("concat_43_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)];
            bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_34, concat_43_values1_0, var_258, concat_43_values3_0))[name = string("concat_43")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [1, 8, 256, 128]> value_states_15 = transpose(perm = var_599, x = var_598)[name = string("transpose_10")];
            tensor<fp16, [64, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_42, begin_mask = model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0, end = concat_43, end_mask = model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_6_stride_0, update = value_states_15, x = coreml_update_state_12)[name = string("model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_13_write_state")];
            tensor<fp16, [64, 8, 1024, 128]> coreml_update_state_13 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_13")];
            tensor<int32, [4]> var_666_begin_0 = const()[name = string("op_666_begin_0"), val = tensor<int32, [4]>([14, 0, 0, 0])];
            tensor<int32, [4]> var_666_end_0 = const()[name = string("op_666_end_0"), val = tensor<int32, [4]>([15, 8, 1024, 128])];
            tensor<bool, [4]> var_666_end_mask_0 = const()[name = string("op_666_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_666_cast_fp16 = slice_by_index(begin = var_666_begin_0, end = var_666_end_0, end_mask = var_666_end_mask_0, x = coreml_update_state_13)[name = string("op_666_cast_fp16")];
            tensor<int32, [1]> K_layer_cache_5_axes_0 = const()[name = string("K_layer_cache_5_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> K_layer_cache_5_cast_fp16 = squeeze(axes = K_layer_cache_5_axes_0, x = var_666_cast_fp16)[name = string("K_layer_cache_5_cast_fp16")];
            tensor<int32, [4]> var_668_begin_0 = const()[name = string("op_668_begin_0"), val = tensor<int32, [4]>([46, 0, 0, 0])];
            tensor<int32, [4]> var_668_end_0 = const()[name = string("op_668_end_0"), val = tensor<int32, [4]>([47, 8, 1024, 128])];
            tensor<bool, [4]> var_668_end_mask_0 = const()[name = string("op_668_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_668_cast_fp16 = slice_by_index(begin = var_668_begin_0, end = var_668_end_0, end_mask = var_668_end_mask_0, x = coreml_update_state_13)[name = string("op_668_cast_fp16")];
            tensor<int32, [1]> V_layer_cache_5_axes_0 = const()[name = string("V_layer_cache_5_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> V_layer_cache_5_cast_fp16 = squeeze(axes = V_layer_cache_5_axes_0, x = var_668_cast_fp16)[name = string("V_layer_cache_5_cast_fp16")];
            tensor<int32, [1]> x_67_axes_0 = const()[name = string("x_67_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_67_cast_fp16 = expand_dims(axes = x_67_axes_0, x = K_layer_cache_5_cast_fp16)[name = string("x_67_cast_fp16")];
            tensor<int32, [4]> var_677 = const()[name = string("op_677"), val = tensor<int32, [4]>([1, 4, 1, 1])];
            tensor<fp16, [8, 4, 1024, 128]> x_69_cast_fp16 = tile(reps = var_677, x = x_67_cast_fp16)[name = string("x_69_cast_fp16")];
            tensor<int32, [4]> var_681 = const()[name = string("op_681"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
            tensor<fp16, [1, 32, 1024, 128]> var_682_cast_fp16 = reshape(shape = var_681, x = x_69_cast_fp16)[name = string("op_682_cast_fp16")];
            tensor<int32, [1]> x_73_axes_0 = const()[name = string("x_73_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_73_cast_fp16 = expand_dims(axes = x_73_axes_0, x = V_layer_cache_5_cast_fp16)[name = string("x_73_cast_fp16")];
            tensor<int32, [4]> var_684 = const()[name = string("op_684"), val = tensor<int32, [4]>([1, 4, 1, 1])];
            tensor<fp16, [8, 4, 1024, 128]> x_75_cast_fp16 = tile(reps = var_684, x = x_73_cast_fp16)[name = string("x_75_cast_fp16")];
            bool var_691_transpose_x_0 = const()[name = string("op_691_transpose_x_0"), val = bool(false)];
            bool var_691_transpose_y_0 = const()[name = string("op_691_transpose_y_0"), val = bool(true)];
            tensor<fp16, [1, 32, 256, 1024]> var_691_cast_fp16 = matmul(transpose_x = var_691_transpose_x_0, transpose_y = var_691_transpose_y_0, x = rotated_9, y = var_682_cast_fp16)[name = string("op_691_cast_fp16")];
            fp16 var_692_to_fp16 = const()[name = string("op_692_to_fp16"), val = fp16(0x1.6ap-4)];
            tensor<fp16, [1, 32, 256, 1024]> attn_weights_5_cast_fp16 = mul(x = var_691_cast_fp16, y = var_692_to_fp16)[name = string("attn_weights_5_cast_fp16")];
            tensor<fp16, [1, 32, 256, 1024]> x_77_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = causal_mask)[name = string("x_77_cast_fp16")];
            tensor<int32, [1]> reduce_max_2_axes_0 = const()[name = string("reduce_max_2_axes_0"), val = tensor<int32, [1]>([-1])];
            bool reduce_max_2_keep_dims_0 = const()[name = string("reduce_max_2_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 32, 256, 1]> reduce_max_2_cast_fp16 = reduce_max(axes = reduce_max_2_axes_0, keep_dims = reduce_max_2_keep_dims_0, x = x_77_cast_fp16)[name = string("reduce_max_2_cast_fp16")];
            tensor<fp16, [1, 32, 256, 1024]> x_79_cast_fp16 = sub(x = x_77_cast_fp16, y = reduce_max_2_cast_fp16)[name = string("x_79_cast_fp16")];
            tensor<fp16, [1, 32, 256, 1024]> exp_x_5_cast_fp16 = exp(x = x_79_cast_fp16)[name = string("exp_x_5_cast_fp16")];
            tensor<int32, [1]> var_703_axes_0 = const()[name = string("op_703_axes_0"), val = tensor<int32, [1]>([-1])];
            bool var_703_keep_dims_0 = const()[name = string("op_703_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 32, 256, 1]> var_703_cast_fp16 = reduce_sum(axes = var_703_axes_0, keep_dims = var_703_keep_dims_0, x = exp_x_5_cast_fp16)[name = string("op_703_cast_fp16")];
            tensor<fp16, [1, 32, 256, 1024]> var_704_cast_fp16 = real_div(x = exp_x_5_cast_fp16, y = var_703_cast_fp16)[name = string("op_704_cast_fp16")];
            tensor<int32, [3]> concat_48 = const()[name = string("concat_48"), val = tensor<int32, [3]>([32, 256, 1024])];
            tensor<fp16, [32, 256, 1024]> reshape_6_cast_fp16 = reshape(shape = concat_48, x = var_704_cast_fp16)[name = string("reshape_6_cast_fp16")];
            tensor<int32, [3]> concat_49 = const()[name = string("concat_49"), val = tensor<int32, [3]>([32, 1024, 128])];
            tensor<fp16, [32, 1024, 128]> reshape_7_cast_fp16 = reshape(shape = concat_49, x = x_75_cast_fp16)[name = string("reshape_7_cast_fp16")];
            bool matmul_2_transpose_x_0 = const()[name = string("matmul_2_transpose_x_0"), val = bool(false)];
            bool matmul_2_transpose_y_0 = const()[name = string("matmul_2_transpose_y_0"), val = bool(false)];
            tensor<fp16, [32, 256, 128]> matmul_2_cast_fp16 = matmul(transpose_x = matmul_2_transpose_x_0, transpose_y = matmul_2_transpose_y_0, x = reshape_6_cast_fp16, y = reshape_7_cast_fp16)[name = string("matmul_2_cast_fp16")];
            tensor<int32, [4]> concat_53 = const()[name = string("concat_53"), val = tensor<int32, [4]>([1, 32, 256, 128])];
            tensor<fp16, [1, 32, 256, 128]> reshape_8_cast_fp16 = reshape(shape = concat_53, x = matmul_2_cast_fp16)[name = string("reshape_8_cast_fp16")];
            tensor<int32, [4]> var_707_perm_0 = const()[name = string("op_707_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_709 = const()[name = string("op_709"), val = tensor<int32, [3]>([1, 256, 4096])];
            tensor<fp16, [1, 256, 32, 128]> var_707_cast_fp16 = transpose(perm = var_707_perm_0, x = reshape_8_cast_fp16)[name = string("transpose_9")];
            tensor<fp16, [1, 256, 4096]> input_33_cast_fp16 = reshape(shape = var_709, x = var_707_cast_fp16)[name = string("input_33_cast_fp16")];
            tensor<fp16, [4096, 4096]> model_model_layers_14_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [4096, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(698928960))), lut = tensor<fp16, [512, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(711511936))))[name = string("model_model_layers_14_self_attn_o_proj_weight_promoted_to_fp16_palettized")];
            tensor<fp16, [1, 256, 4096]> linear_2_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_14_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_33_cast_fp16)[name = string("linear_2_cast_fp16")];
            tensor<fp16, [1, 256, 4096]> hidden_states_21_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = linear_2_cast_fp16)[name = string("hidden_states_21_cast_fp16")];
            tensor<int32, [1]> mean_11_axes_0 = const()[name = string("mean_11_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_11_keep_dims_0 = const()[name = string("mean_11_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 256, 1]> mean_11_cast_fp16 = reduce_mean(axes = mean_11_axes_0, keep_dims = mean_11_keep_dims_0, x = hidden_states_21_cast_fp16)[name = string("mean_11_cast_fp16")];
            tensor<fp16, [1, 256, 4096]> input_35_cast_fp16 = sub(x = hidden_states_21_cast_fp16, y = mean_11_cast_fp16)[name = string("input_35_cast_fp16")];
            tensor<int32, [1]> var_720_axes_0 = const()[name = string("op_720_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [4096]> model_model_layers_14_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_14_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(711577536)))];
            tensor<fp16, [1, 256, 4096]> var_720_cast_fp16 = layer_norm(axes = var_720_axes_0, epsilon = var_38_to_fp16, gamma = model_model_layers_14_post_attention_layernorm_weight_to_fp16, x = input_35_cast_fp16)[name = string("op_720_cast_fp16")];
            tensor<int32, [3]> var_727 = const()[name = string("op_727"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> input_37_axes_0 = const()[name = string("input_37_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 4096, 256]> var_728 = transpose(perm = var_727, x = var_720_cast_fp16)[name = string("transpose_8")];
            tensor<fp16, [1, 4096, 1, 256]> input_37 = expand_dims(axes = input_37_axes_0, x = var_728)[name = string("input_37")];
            string input_39_pad_type_0 = const()[name = string("input_39_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> input_39_strides_0 = const()[name = string("input_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> input_39_pad_0 = const()[name = string("input_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> input_39_dilations_0 = const()[name = string("input_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 input_39_groups_0 = const()[name = string("input_39_groups_0"), val = int32(1)];
            tensor<fp16, [1, 14336, 1, 256]> input_39 = conv(dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = model_model_layers_14_mlp_gate_proj_weight_palettized, x = input_37)[name = string("input_39")];
            string up_states_5_pad_type_0 = const()[name = string("up_states_5_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> up_states_5_strides_0 = const()[name = string("up_states_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> up_states_5_pad_0 = const()[name = string("up_states_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> up_states_5_dilations_0 = const()[name = string("up_states_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 up_states_5_groups_0 = const()[name = string("up_states_5_groups_0"), val = int32(1)];
            tensor<fp16, [1, 14336, 1, 256]> up_states_5 = conv(dilations = up_states_5_dilations_0, groups = up_states_5_groups_0, pad = up_states_5_pad_0, pad_type = up_states_5_pad_type_0, strides = up_states_5_strides_0, weight = model_model_layers_14_mlp_up_proj_weight_palettized, x = input_37)[name = string("up_states_5")];
            tensor<fp16, [1, 14336, 1, 256]> gate_states_5 = silu(x = input_39)[name = string("gate_states_5")];
            tensor<fp16, [1, 14336, 1, 256]> input_41 = mul(x = gate_states_5, y = up_states_5)[name = string("input_41")];
            string hidden_states_23_pad_type_0 = const()[name = string("hidden_states_23_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> hidden_states_23_strides_0 = const()[name = string("hidden_states_23_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> hidden_states_23_pad_0 = const()[name = string("hidden_states_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> hidden_states_23_dilations_0 = const()[name = string("hidden_states_23_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 hidden_states_23_groups_0 = const()[name = string("hidden_states_23_groups_0"), val = int32(1)];
            tensor<fp16, [1, 4096, 1, 256]> hidden_states_23 = conv(dilations = hidden_states_23_dilations_0, groups = hidden_states_23_groups_0, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = hidden_states_23_strides_0, weight = model_model_layers_14_mlp_down_proj_weight_palettized, x = input_41)[name = string("hidden_states_23")];
            tensor<int32, [1]> var_750_axes_0 = const()[name = string("op_750_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 4096, 256]> var_750 = squeeze(axes = var_750_axes_0, x = hidden_states_23)[name = string("op_750")];
            tensor<int32, [3]> var_751 = const()[name = string("op_751"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 256, 4096]> var_752 = transpose(perm = var_751, x = var_750)[name = string("transpose_7")];
            tensor<fp16, [1, 256, 4096]> hidden_states_25_cast_fp16 = add(x = hidden_states_21_cast_fp16, y = var_752)[name = string("hidden_states_25_cast_fp16")];
            tensor<int32, [1]> mean_13_axes_0 = const()[name = string("mean_13_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_13_keep_dims_0 = const()[name = string("mean_13_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 256, 1]> mean_13_cast_fp16 = reduce_mean(axes = mean_13_axes_0, keep_dims = mean_13_keep_dims_0, x = hidden_states_25_cast_fp16)[name = string("mean_13_cast_fp16")];
            tensor<fp16, [1, 256, 4096]> input_43_cast_fp16 = sub(x = hidden_states_25_cast_fp16, y = mean_13_cast_fp16)[name = string("input_43_cast_fp16")];
            tensor<int32, [1]> var_760_axes_0 = const()[name = string("op_760_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [4096]> model_model_layers_15_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_15_input_layernorm_weight_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(711585792)))];
            tensor<fp16, [1, 256, 4096]> var_760_cast_fp16 = layer_norm(axes = var_760_axes_0, epsilon = var_38_to_fp16, gamma = model_model_layers_15_input_layernorm_weight_to_fp16, x = input_43_cast_fp16)[name = string("op_760_cast_fp16")];
            tensor<int32, [3]> var_764 = const()[name = string("op_764"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> var_766_axes_0 = const()[name = string("op_766_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 4096, 256]> var_765 = transpose(perm = var_764, x = var_760_cast_fp16)[name = string("transpose_6")];
            tensor<fp16, [1, 4096, 1, 256]> var_766 = expand_dims(axes = var_766_axes_0, x = var_765)[name = string("op_766")];
            string query_states_13_pad_type_0 = const()[name = string("query_states_13_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> query_states_13_strides_0 = const()[name = string("query_states_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> query_states_13_pad_0 = const()[name = string("query_states_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> query_states_13_dilations_0 = const()[name = string("query_states_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 query_states_13_groups_0 = const()[name = string("query_states_13_groups_0"), val = int32(1)];
            tensor<fp16, [1, 4096, 1, 256]> query_states_13 = conv(dilations = query_states_13_dilations_0, groups = query_states_13_groups_0, pad = query_states_13_pad_0, pad_type = query_states_13_pad_type_0, strides = query_states_13_strides_0, weight = model_model_layers_15_self_attn_q_proj_weight_palettized, x = var_766)[name = string("query_states_13")];
            string key_states_19_pad_type_0 = const()[name = string("key_states_19_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> key_states_19_strides_0 = const()[name = string("key_states_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> key_states_19_pad_0 = const()[name = string("key_states_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> key_states_19_dilations_0 = const()[name = string("key_states_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 key_states_19_groups_0 = const()[name = string("key_states_19_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 256]> key_states_19 = conv(dilations = key_states_19_dilations_0, groups = key_states_19_groups_0, pad = key_states_19_pad_0, pad_type = key_states_19_pad_type_0, strides = key_states_19_strides_0, weight = model_model_layers_15_self_attn_k_proj_weight_palettized, x = var_766)[name = string("key_states_19")];
            string value_states_19_pad_type_0 = const()[name = string("value_states_19_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> value_states_19_strides_0 = const()[name = string("value_states_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> value_states_19_pad_0 = const()[name = string("value_states_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> value_states_19_dilations_0 = const()[name = string("value_states_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 value_states_19_groups_0 = const()[name = string("value_states_19_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 256]> value_states_19 = conv(dilations = value_states_19_dilations_0, groups = value_states_19_groups_0, pad = value_states_19_pad_0, pad_type = value_states_19_pad_type_0, strides = value_states_19_strides_0, weight = model_model_layers_15_self_attn_v_proj_weight_palettized, x = var_766)[name = string("value_states_19")];
            tensor<int32, [4]> var_786 = const()[name = string("op_786"), val = tensor<int32, [4]>([1, 32, 128, 256])];
            tensor<fp16, [1, 32, 128, 256]> var_787 = reshape(shape = var_786, x = query_states_13)[name = string("op_787")];
            tensor<int32, [4]> var_788 = const()[name = string("op_788"), val = tensor<int32, [4]>([0, 1, 3, 2])];
            tensor<int32, [4]> var_790 = const()[name = string("op_790"), val = tensor<int32, [4]>([1, 8, 128, 256])];
            tensor<fp16, [1, 8, 128, 256]> var_791 = reshape(shape = var_790, x = key_states_19)[name = string("op_791")];
            tensor<int32, [4]> var_792 = const()[name = string("op_792"), val = tensor<int32, [4]>([0, 1, 3, 2])];
            tensor<int32, [4]> var_794 = const()[name = string("op_794"), val = tensor<int32, [4]>([1, 8, 128, 256])];
            tensor<fp16, [1, 8, 128, 256]> var_795 = reshape(shape = var_794, x = value_states_19)[name = string("op_795")];
            tensor<int32, [4]> var_796 = const()[name = string("op_796"), val = tensor<int32, [4]>([0, 1, 3, 2])];
            tensor<int32, [4]> x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor<int32, [4]>([1, 32, 256, 64])];
            tensor<bool, [4]> x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 32, 256, 128]> x_85 = transpose(perm = var_788, x = var_787)[name = string("transpose_5")];
            tensor<fp16, [1, 32, 256, 64]> x1_13 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = x_85)[name = string("x1_13")];
            tensor<int32, [4]> x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor<int32, [4]>([1, 32, 256, 128])];
            tensor<bool, [4]> x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 32, 256, 64]> x2_13 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = x_85)[name = string("x2_13")];
            tensor<fp16, [1, 32, 256, 64]> var_814 = mul(x = x1_13, y = cos_7)[name = string("op_814")];
            tensor<fp16, [1, 32, 256, 64]> var_815 = mul(x = x2_13, y = sin_7)[name = string("op_815")];
            tensor<fp16, [1, 32, 256, 64]> var_816 = sub(x = var_814, y = var_815)[name = string("op_816")];
            tensor<fp16, [1, 32, 256, 64]> var_817 = mul(x = x2_13, y = cos_7)[name = string("op_817")];
            tensor<fp16, [1, 32, 256, 64]> var_818 = mul(x = x1_13, y = sin_7)[name = string("op_818")];
            tensor<fp16, [1, 32, 256, 64]> var_819 = add(x = var_817, y = var_818)[name = string("op_819")];
            bool rotated_13_interleave_0 = const()[name = string("rotated_13_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 32, 256, 128]> rotated_13 = concat(axis = var_36, interleave = rotated_13_interleave_0, values = (var_816, var_819))[name = string("rotated_13")];
            tensor<int32, [4]> x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_end_0 = const()[name = string("x1_end_0"), val = tensor<int32, [4]>([1, 8, 256, 64])];
            tensor<bool, [4]> x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 8, 256, 128]> x_89 = transpose(perm = var_792, x = var_791)[name = string("transpose_4")];
            tensor<fp16, [1, 8, 256, 64]> x1 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = x_89)[name = string("x1")];
            tensor<int32, [4]> x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_end_0 = const()[name = string("x2_end_0"), val = tensor<int32, [4]>([1, 8, 256, 128])];
            tensor<bool, [4]> x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 8, 256, 64]> x2 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = x_89)[name = string("x2")];
            tensor<fp16, [1, 8, 256, 64]> var_835 = mul(x = x1, y = cos_7)[name = string("op_835")];
            tensor<fp16, [1, 8, 256, 64]> var_836 = mul(x = x2, y = sin_7)[name = string("op_836")];
            tensor<fp16, [1, 8, 256, 64]> var_837 = sub(x = var_835, y = var_836)[name = string("op_837")];
            tensor<fp16, [1, 8, 256, 64]> var_838 = mul(x = x2, y = cos_7)[name = string("op_838")];
            tensor<fp16, [1, 8, 256, 64]> var_839 = mul(x = x1, y = sin_7)[name = string("op_839")];
            tensor<fp16, [1, 8, 256, 64]> var_840 = add(x = var_838, y = var_839)[name = string("op_840")];
            bool rotated_interleave_0 = const()[name = string("rotated_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 8, 256, 128]> rotated = concat(axis = var_36, interleave = rotated_interleave_0, values = (var_837, var_840))[name = string("rotated")];
            tensor<int32, [1]> expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor<int32, [1]>([15])];
            tensor<int32, [1]> expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_40 = const()[name = string("expand_dims_40"), val = tensor<int32, [1]>([16])];
            int32 concat_56_axis_0 = const()[name = string("concat_56_axis_0"), val = int32(0)];
            bool concat_56_interleave_0 = const()[name = string("concat_56_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_56 = concat(axis = concat_56_axis_0, interleave = concat_56_interleave_0, values = (expand_dims_36, expand_dims_37, current_pos, expand_dims_39))[name = string("concat_56")];
            tensor<int32, [1]> concat_57_values1_0 = const()[name = string("concat_57_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_57_values3_0 = const()[name = string("concat_57_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_57_axis_0 = const()[name = string("concat_57_axis_0"), val = int32(0)];
            bool concat_57_interleave_0 = const()[name = string("concat_57_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_57 = concat(axis = concat_57_axis_0, interleave = concat_57_interleave_0, values = (expand_dims_40, concat_57_values1_0, var_258, concat_57_values3_0))[name = string("concat_57")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [64, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_56, begin_mask = model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0, end = concat_57, end_mask = model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_7_stride_0, update = rotated, x = coreml_update_state_13)[name = string("model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_14_write_state")];
            tensor<fp16, [64, 8, 1024, 128]> coreml_update_state_14 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_14")];
            tensor<int32, [1]> expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor<int32, [1]>([47])];
            tensor<int32, [1]> expand_dims_43 = const()[name = string("expand_dims_43"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_45 = const()[name = string("expand_dims_45"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_46 = const()[name = string("expand_dims_46"), val = tensor<int32, [1]>([48])];
            int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)];
            bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (expand_dims_42, expand_dims_43, current_pos, expand_dims_45))[name = string("concat_60")];
            tensor<int32, [1]> concat_61_values1_0 = const()[name = string("concat_61_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_61_values3_0 = const()[name = string("concat_61_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_61_axis_0 = const()[name = string("concat_61_axis_0"), val = int32(0)];
            bool concat_61_interleave_0 = const()[name = string("concat_61_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_61 = concat(axis = concat_61_axis_0, interleave = concat_61_interleave_0, values = (expand_dims_46, concat_61_values1_0, var_258, concat_61_values3_0))[name = string("concat_61")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [1, 8, 256, 128]> value_states_21 = transpose(perm = var_796, x = var_795)[name = string("transpose_3")];
            tensor<fp16, [64, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_60, begin_mask = model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0, end = concat_61, end_mask = model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_8_stride_0, update = value_states_21, x = coreml_update_state_14)[name = string("model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_15_write_state")];
            tensor<fp16, [64, 8, 1024, 128]> coreml_update_state_15 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_15")];
            tensor<int32, [4]> var_863_begin_0 = const()[name = string("op_863_begin_0"), val = tensor<int32, [4]>([15, 0, 0, 0])];
            tensor<int32, [4]> var_863_end_0 = const()[name = string("op_863_end_0"), val = tensor<int32, [4]>([16, 8, 1024, 128])];
            tensor<bool, [4]> var_863_end_mask_0 = const()[name = string("op_863_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_863_cast_fp16 = slice_by_index(begin = var_863_begin_0, end = var_863_end_0, end_mask = var_863_end_mask_0, x = coreml_update_state_15)[name = string("op_863_cast_fp16")];
            tensor<int32, [1]> K_layer_cache_axes_0 = const()[name = string("K_layer_cache_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> K_layer_cache_cast_fp16 = squeeze(axes = K_layer_cache_axes_0, x = var_863_cast_fp16)[name = string("K_layer_cache_cast_fp16")];
            tensor<int32, [4]> var_865_begin_0 = const()[name = string("op_865_begin_0"), val = tensor<int32, [4]>([47, 0, 0, 0])];
            tensor<int32, [4]> var_865_end_0 = const()[name = string("op_865_end_0"), val = tensor<int32, [4]>([48, 8, 1024, 128])];
            tensor<bool, [4]> var_865_end_mask_0 = const()[name = string("op_865_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_865_cast_fp16 = slice_by_index(begin = var_865_begin_0, end = var_865_end_0, end_mask = var_865_end_mask_0, x = coreml_update_state_15)[name = string("op_865_cast_fp16")];
            tensor<int32, [1]> V_layer_cache_axes_0 = const()[name = string("V_layer_cache_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> V_layer_cache_cast_fp16 = squeeze(axes = V_layer_cache_axes_0, x = var_865_cast_fp16)[name = string("V_layer_cache_cast_fp16")];
            tensor<int32, [1]> x_95_axes_0 = const()[name = string("x_95_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_95_cast_fp16 = expand_dims(axes = x_95_axes_0, x = K_layer_cache_cast_fp16)[name = string("x_95_cast_fp16")];
            tensor<int32, [4]> var_874 = const()[name = string("op_874"), val = tensor<int32, [4]>([1, 4, 1, 1])];
            tensor<fp16, [8, 4, 1024, 128]> x_97_cast_fp16 = tile(reps = var_874, x = x_95_cast_fp16)[name = string("x_97_cast_fp16")];
            tensor<int32, [4]> var_878 = const()[name = string("op_878"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
            tensor<fp16, [1, 32, 1024, 128]> var_879_cast_fp16 = reshape(shape = var_878, x = x_97_cast_fp16)[name = string("op_879_cast_fp16")];
            tensor<int32, [1]> x_101_axes_0 = const()[name = string("x_101_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_101_cast_fp16 = expand_dims(axes = x_101_axes_0, x = V_layer_cache_cast_fp16)[name = string("x_101_cast_fp16")];
            tensor<int32, [4]> var_881 = const()[name = string("op_881"), val = tensor<int32, [4]>([1, 4, 1, 1])];
            tensor<fp16, [8, 4, 1024, 128]> x_103_cast_fp16 = tile(reps = var_881, x = x_101_cast_fp16)[name = string("x_103_cast_fp16")];
            bool var_888_transpose_x_0 = const()[name = string("op_888_transpose_x_0"), val = bool(false)];
            bool var_888_transpose_y_0 = const()[name = string("op_888_transpose_y_0"), val = bool(true)];
            tensor<fp16, [1, 32, 256, 1024]> var_888_cast_fp16 = matmul(transpose_x = var_888_transpose_x_0, transpose_y = var_888_transpose_y_0, x = rotated_13, y = var_879_cast_fp16)[name = string("op_888_cast_fp16")];
            fp16 var_889_to_fp16 = const()[name = string("op_889_to_fp16"), val = fp16(0x1.6ap-4)];
            tensor<fp16, [1, 32, 256, 1024]> attn_weights_cast_fp16 = mul(x = var_888_cast_fp16, y = var_889_to_fp16)[name = string("attn_weights_cast_fp16")];
            tensor<fp16, [1, 32, 256, 1024]> x_105_cast_fp16 = add(x = attn_weights_cast_fp16, y = causal_mask)[name = string("x_105_cast_fp16")];
            tensor<int32, [1]> reduce_max_3_axes_0 = const()[name = string("reduce_max_3_axes_0"), val = tensor<int32, [1]>([-1])];
            bool reduce_max_3_keep_dims_0 = const()[name = string("reduce_max_3_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 32, 256, 1]> reduce_max_3_cast_fp16 = reduce_max(axes = reduce_max_3_axes_0, keep_dims = reduce_max_3_keep_dims_0, x = x_105_cast_fp16)[name = string("reduce_max_3_cast_fp16")];
            tensor<fp16, [1, 32, 256, 1024]> x_107_cast_fp16 = sub(x = x_105_cast_fp16, y = reduce_max_3_cast_fp16)[name = string("x_107_cast_fp16")];
            tensor<fp16, [1, 32, 256, 1024]> exp_x_cast_fp16 = exp(x = x_107_cast_fp16)[name = string("exp_x_cast_fp16")];
            tensor<int32, [1]> var_900_axes_0 = const()[name = string("op_900_axes_0"), val = tensor<int32, [1]>([-1])];
            bool var_900_keep_dims_0 = const()[name = string("op_900_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 32, 256, 1]> var_900_cast_fp16 = reduce_sum(axes = var_900_axes_0, keep_dims = var_900_keep_dims_0, x = exp_x_cast_fp16)[name = string("op_900_cast_fp16")];
            tensor<fp16, [1, 32, 256, 1024]> var_901_cast_fp16 = real_div(x = exp_x_cast_fp16, y = var_900_cast_fp16)[name = string("op_901_cast_fp16")];
            tensor<int32, [3]> concat_66 = const()[name = string("concat_66"), val = tensor<int32, [3]>([32, 256, 1024])];
            tensor<fp16, [32, 256, 1024]> reshape_9_cast_fp16 = reshape(shape = concat_66, x = var_901_cast_fp16)[name = string("reshape_9_cast_fp16")];
            tensor<int32, [3]> concat_67 = const()[name = string("concat_67"), val = tensor<int32, [3]>([32, 1024, 128])];
            tensor<fp16, [32, 1024, 128]> reshape_10_cast_fp16 = reshape(shape = concat_67, x = x_103_cast_fp16)[name = string("reshape_10_cast_fp16")];
            bool matmul_3_transpose_x_0 = const()[name = string("matmul_3_transpose_x_0"), val = bool(false)];
            bool matmul_3_transpose_y_0 = const()[name = string("matmul_3_transpose_y_0"), val = bool(false)];
            tensor<fp16, [32, 256, 128]> matmul_3_cast_fp16 = matmul(transpose_x = matmul_3_transpose_x_0, transpose_y = matmul_3_transpose_y_0, x = reshape_9_cast_fp16, y = reshape_10_cast_fp16)[name = string("matmul_3_cast_fp16")];
            tensor<int32, [4]> concat_71 = const()[name = string("concat_71"), val = tensor<int32, [4]>([1, 32, 256, 128])];
            tensor<fp16, [1, 32, 256, 128]> reshape_11_cast_fp16 = reshape(shape = concat_71, x = matmul_3_cast_fp16)[name = string("reshape_11_cast_fp16")];
            tensor<int32, [4]> var_904_perm_0 = const()[name = string("op_904_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_906 = const()[name = string("op_906"), val = tensor<int32, [3]>([1, 256, 4096])];
            tensor<fp16, [1, 256, 32, 128]> var_904_cast_fp16 = transpose(perm = var_904_perm_0, x = reshape_11_cast_fp16)[name = string("transpose_2")];
            tensor<fp16, [1, 256, 4096]> input_47_cast_fp16 = reshape(shape = var_906, x = var_904_cast_fp16)[name = string("input_47_cast_fp16")];
            tensor<fp16, [4096, 4096]> model_model_layers_15_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint6, [4096, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(711594048))), lut = tensor<fp16, [512, 1, 64, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(724177024))))[name = string("model_model_layers_15_self_attn_o_proj_weight_promoted_to_fp16_palettized")];
            tensor<fp16, [1, 256, 4096]> linear_3_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_15_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_47_cast_fp16)[name = string("linear_3_cast_fp16")];
            tensor<fp16, [1, 256, 4096]> hidden_states_29_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = linear_3_cast_fp16)[name = string("hidden_states_29_cast_fp16")];
            tensor<int32, [1]> mean_axes_0 = const()[name = string("mean_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_keep_dims_0 = const()[name = string("mean_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 256, 1]> mean_cast_fp16 = reduce_mean(axes = mean_axes_0, keep_dims = mean_keep_dims_0, x = hidden_states_29_cast_fp16)[name = string("mean_cast_fp16")];
            tensor<fp16, [1, 256, 4096]> input_49_cast_fp16 = sub(x = hidden_states_29_cast_fp16, y = mean_cast_fp16)[name = string("input_49_cast_fp16")];
            tensor<int32, [1]> var_917_axes_0 = const()[name = string("op_917_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [4096]> model_model_layers_15_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_15_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(724242624)))];
            tensor<fp16, [1, 256, 4096]> var_917_cast_fp16 = layer_norm(axes = var_917_axes_0, epsilon = var_38_to_fp16, gamma = model_model_layers_15_post_attention_layernorm_weight_to_fp16, x = input_49_cast_fp16)[name = string("op_917_cast_fp16")];
            tensor<int32, [3]> var_924 = const()[name = string("op_924"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> input_51_axes_0 = const()[name = string("input_51_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 4096, 256]> var_925 = transpose(perm = var_924, x = var_917_cast_fp16)[name = string("transpose_1")];
            tensor<fp16, [1, 4096, 1, 256]> input_51 = expand_dims(axes = input_51_axes_0, x = var_925)[name = string("input_51")];
            string input_53_pad_type_0 = const()[name = string("input_53_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> input_53_strides_0 = const()[name = string("input_53_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> input_53_pad_0 = const()[name = string("input_53_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> input_53_dilations_0 = const()[name = string("input_53_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 input_53_groups_0 = const()[name = string("input_53_groups_0"), val = int32(1)];
            tensor<fp16, [1, 14336, 1, 256]> input_53 = conv(dilations = input_53_dilations_0, groups = input_53_groups_0, pad = input_53_pad_0, pad_type = input_53_pad_type_0, strides = input_53_strides_0, weight = model_model_layers_15_mlp_gate_proj_weight_palettized, x = input_51)[name = string("input_53")];
            string up_states_pad_type_0 = const()[name = string("up_states_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> up_states_strides_0 = const()[name = string("up_states_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> up_states_pad_0 = const()[name = string("up_states_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> up_states_dilations_0 = const()[name = string("up_states_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 up_states_groups_0 = const()[name = string("up_states_groups_0"), val = int32(1)];
            tensor<fp16, [1, 14336, 1, 256]> up_states = conv(dilations = up_states_dilations_0, groups = up_states_groups_0, pad = up_states_pad_0, pad_type = up_states_pad_type_0, strides = up_states_strides_0, weight = model_model_layers_15_mlp_up_proj_weight_palettized, x = input_51)[name = string("up_states")];
            tensor<fp16, [1, 14336, 1, 256]> gate_states = silu(x = input_53)[name = string("gate_states")];
            tensor<fp16, [1, 14336, 1, 256]> input = mul(x = gate_states, y = up_states)[name = string("input")];
            string hidden_states_pad_type_0 = const()[name = string("hidden_states_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> hidden_states_strides_0 = const()[name = string("hidden_states_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> hidden_states_pad_0 = const()[name = string("hidden_states_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> hidden_states_dilations_0 = const()[name = string("hidden_states_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 hidden_states_groups_0 = const()[name = string("hidden_states_groups_0"), val = int32(1)];
            tensor<fp16, [1, 4096, 1, 256]> hidden_states_1 = conv(dilations = hidden_states_dilations_0, groups = hidden_states_groups_0, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = hidden_states_strides_0, weight = model_model_layers_15_mlp_down_proj_weight_palettized, x = input)[name = string("hidden_states")];
            tensor<int32, [1]> var_947_axes_0 = const()[name = string("op_947_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 4096, 256]> var_947 = squeeze(axes = var_947_axes_0, x = hidden_states_1)[name = string("op_947")];
            tensor<int32, [3]> var_948 = const()[name = string("op_948"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 256, 4096]> var_949 = transpose(perm = var_948, x = var_947)[name = string("transpose_0")];
            tensor<fp16, [1, 256, 4096]> output_hidden_states = add(x = hidden_states_29_cast_fp16, y = var_949)[name = string("op_950_cast_fp16")];
        } -> (output_hidden_states);
}