anemll's picture
Upload folder using huggingface_hub
f9662c8 verified
raw
history blame
473 kB
program(1.3)
[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3404.16.1"}, {"coremlc-version", "3404.23.1"}})]
{
func infer<ios18>(tensor<fp16, [1, 1, 1, 1024]> causal_mask, tensor<int32, [1]> current_pos, tensor<fp16, [1, 1, 3072]> hidden_states, state<tensor<fp16, [56, 8, 1024, 128]>> model_model_kv_cache_0, tensor<int32, [1]> position_ids) {
tensor<fp16, [3072, 3072, 1, 1]> model_model_layers_14_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4718720))))[name = string("model_model_layers_14_self_attn_q_proj_weight_palettized")];
tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_14_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4731072))), lut = tensor<fp16, [128, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6304000))))[name = string("model_model_layers_14_self_attn_k_proj_weight_palettized")];
tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_14_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6308160))), lut = tensor<fp16, [128, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7881088))))[name = string("model_model_layers_14_self_attn_v_proj_weight_palettized")];
tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_14_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7885248))), lut = tensor<fp16, [1024, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20468224))))[name = string("model_model_layers_14_mlp_gate_proj_weight_palettized")];
tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_14_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20501056))), lut = tensor<fp16, [1024, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33084032))))[name = string("model_model_layers_14_mlp_up_proj_weight_palettized")];
tensor<fp16, [3072, 8192, 1, 1]> model_model_layers_14_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 8192, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33116864))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45699840))))[name = string("model_model_layers_14_mlp_down_proj_weight_palettized")];
tensor<fp16, [3072, 3072, 1, 1]> model_model_layers_15_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45712192))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50430848))))[name = string("model_model_layers_15_self_attn_q_proj_weight_palettized")];
tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_15_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50443200))), lut = tensor<fp16, [128, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52016128))))[name = string("model_model_layers_15_self_attn_k_proj_weight_palettized")];
tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_15_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52020288))), lut = tensor<fp16, [128, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53593216))))[name = string("model_model_layers_15_self_attn_v_proj_weight_palettized")];
tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_15_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53597376))), lut = tensor<fp16, [1024, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66180352))))[name = string("model_model_layers_15_mlp_gate_proj_weight_palettized")];
tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_15_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66213184))), lut = tensor<fp16, [1024, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78796160))))[name = string("model_model_layers_15_mlp_up_proj_weight_palettized")];
tensor<fp16, [3072, 8192, 1, 1]> model_model_layers_15_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 8192, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78828992))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91411968))))[name = string("model_model_layers_15_mlp_down_proj_weight_palettized")];
tensor<fp16, [3072, 3072, 1, 1]> model_model_layers_16_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91424320))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96142976))))[name = string("model_model_layers_16_self_attn_q_proj_weight_palettized")];
tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_16_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96155328))), lut = tensor<fp16, [128, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97728256))))[name = string("model_model_layers_16_self_attn_k_proj_weight_palettized")];
tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_16_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97732416))), lut = tensor<fp16, [128, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99305344))))[name = string("model_model_layers_16_self_attn_v_proj_weight_palettized")];
tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_16_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99309504))), lut = tensor<fp16, [1024, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111892480))))[name = string("model_model_layers_16_mlp_gate_proj_weight_palettized")];
tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_16_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111925312))), lut = tensor<fp16, [1024, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124508288))))[name = string("model_model_layers_16_mlp_up_proj_weight_palettized")];
tensor<fp16, [3072, 8192, 1, 1]> model_model_layers_16_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 8192, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124541120))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137124096))))[name = string("model_model_layers_16_mlp_down_proj_weight_palettized")];
tensor<fp16, [3072, 3072, 1, 1]> model_model_layers_17_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137136448))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141855104))))[name = string("model_model_layers_17_self_attn_q_proj_weight_palettized")];
tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_17_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141867456))), lut = tensor<fp16, [128, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143440384))))[name = string("model_model_layers_17_self_attn_k_proj_weight_palettized")];
tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_17_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143444544))), lut = tensor<fp16, [128, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145017472))))[name = string("model_model_layers_17_self_attn_v_proj_weight_palettized")];
tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_17_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145021632))), lut = tensor<fp16, [1024, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157604608))))[name = string("model_model_layers_17_mlp_gate_proj_weight_palettized")];
tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_17_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157637440))), lut = tensor<fp16, [1024, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170220416))))[name = string("model_model_layers_17_mlp_up_proj_weight_palettized")];
tensor<fp16, [3072, 8192, 1, 1]> model_model_layers_17_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 8192, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170253248))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182836224))))[name = string("model_model_layers_17_mlp_down_proj_weight_palettized")];
tensor<fp16, [3072, 3072, 1, 1]> model_model_layers_18_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182848576))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187567232))))[name = string("model_model_layers_18_self_attn_q_proj_weight_palettized")];
tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_18_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187579584))), lut = tensor<fp16, [128, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(189152512))))[name = string("model_model_layers_18_self_attn_k_proj_weight_palettized")];
tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_18_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(189156672))), lut = tensor<fp16, [128, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190729600))))[name = string("model_model_layers_18_self_attn_v_proj_weight_palettized")];
tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_18_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190733760))), lut = tensor<fp16, [1024, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203316736))))[name = string("model_model_layers_18_mlp_gate_proj_weight_palettized")];
tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_18_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203349568))), lut = tensor<fp16, [1024, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(215932544))))[name = string("model_model_layers_18_mlp_up_proj_weight_palettized")];
tensor<fp16, [3072, 8192, 1, 1]> model_model_layers_18_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 8192, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(215965376))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(228548352))))[name = string("model_model_layers_18_mlp_down_proj_weight_palettized")];
tensor<fp16, [3072, 3072, 1, 1]> model_model_layers_19_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(228560704))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233279360))))[name = string("model_model_layers_19_self_attn_q_proj_weight_palettized")];
tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_19_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233291712))), lut = tensor<fp16, [128, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234864640))))[name = string("model_model_layers_19_self_attn_k_proj_weight_palettized")];
tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_19_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234868800))), lut = tensor<fp16, [128, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236441728))))[name = string("model_model_layers_19_self_attn_v_proj_weight_palettized")];
tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_19_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236445888))), lut = tensor<fp16, [1024, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249028864))))[name = string("model_model_layers_19_mlp_gate_proj_weight_palettized")];
tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_19_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249061696))), lut = tensor<fp16, [1024, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261644672))))[name = string("model_model_layers_19_mlp_up_proj_weight_palettized")];
tensor<fp16, [3072, 8192, 1, 1]> model_model_layers_19_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 8192, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261677504))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(274260480))))[name = string("model_model_layers_19_mlp_down_proj_weight_palettized")];
tensor<fp16, [3072, 3072, 1, 1]> model_model_layers_20_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(274272832))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(278991488))))[name = string("model_model_layers_20_self_attn_q_proj_weight_palettized")];
tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_20_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279003840))), lut = tensor<fp16, [128, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280576768))))[name = string("model_model_layers_20_self_attn_k_proj_weight_palettized")];
tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_20_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280580928))), lut = tensor<fp16, [128, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282153856))))[name = string("model_model_layers_20_self_attn_v_proj_weight_palettized")];
tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_20_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282158016))), lut = tensor<fp16, [1024, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294740992))))[name = string("model_model_layers_20_mlp_gate_proj_weight_palettized")];
tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_20_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294773824))), lut = tensor<fp16, [1024, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307356800))))[name = string("model_model_layers_20_mlp_up_proj_weight_palettized")];
tensor<fp16, [3072, 8192, 1, 1]> model_model_layers_20_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 8192, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307389632))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319972608))))[name = string("model_model_layers_20_mlp_down_proj_weight_palettized")];
int32 var_52 = const()[name = string("op_52"), val = int32(-1)];
int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)];
tensor<bool, [1]> greater_equal_0 = greater_equal(x = current_pos, y = greater_equal_0_y_0)[name = string("greater_equal_0")];
int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(131072)];
tensor<int32, [1]> add_0 = add(x = current_pos, y = slice_by_index_0)[name = string("add_0")];
tensor<int32, [1]> select_0 = select(a = current_pos, b = add_0, cond = greater_equal_0)[name = string("select_0")];
int32 var_236_axis_0 = const()[name = string("op_236_axis_0"), val = int32(1)];
int32 var_236_batch_dims_0 = const()[name = string("op_236_batch_dims_0"), val = int32(0)];
bool var_236_validate_indices_0 = const()[name = string("op_236_validate_indices_0"), val = bool(false)];
tensor<fp16, [1, 131072, 128]> var_57_to_fp16 = const()[name = string("op_57_to_fp16"), val = tensor<fp16, [1, 131072, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319984960)))];
tensor<fp16, [1, 1, 128]> var_236_cast_fp16 = gather(axis = var_236_axis_0, batch_dims = var_236_batch_dims_0, indices = select_0, validate_indices = var_236_validate_indices_0, x = var_57_to_fp16)[name = string("op_236_cast_fp16")];
tensor<int32, [4]> var_237 = const()[name = string("op_237"), val = tensor<int32, [4]>([1, 1, 1, -1])];
tensor<fp16, [1, 1, 1, 128]> sin_1_cast_fp16 = reshape(shape = var_237, x = var_236_cast_fp16)[name = string("sin_1_cast_fp16")];
int32 var_241_axis_0 = const()[name = string("op_241_axis_0"), val = int32(1)];
int32 var_241_batch_dims_0 = const()[name = string("op_241_batch_dims_0"), val = int32(0)];
bool var_241_validate_indices_0 = const()[name = string("op_241_validate_indices_0"), val = bool(false)];
tensor<fp16, [1, 131072, 128]> var_51_to_fp16 = const()[name = string("op_51_to_fp16"), val = tensor<fp16, [1, 131072, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353539456)))];
tensor<fp16, [1, 1, 128]> var_241_cast_fp16 = gather(axis = var_241_axis_0, batch_dims = var_241_batch_dims_0, indices = select_0, validate_indices = var_241_validate_indices_0, x = var_51_to_fp16)[name = string("op_241_cast_fp16")];
tensor<int32, [4]> var_242 = const()[name = string("op_242"), val = tensor<int32, [4]>([1, 1, 1, -1])];
tensor<fp16, [1, 1, 1, 128]> cos_1_cast_fp16 = reshape(shape = var_242, x = var_241_cast_fp16)[name = string("cos_1_cast_fp16")];
tensor<int32, [1]> mean_1_axes_0 = const()[name = string("mean_1_axes_0"), val = tensor<int32, [1]>([-1])];
bool mean_1_keep_dims_0 = const()[name = string("mean_1_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 1, 1]> mean_1_cast_fp16 = reduce_mean(axes = mean_1_axes_0, keep_dims = mean_1_keep_dims_0, x = hidden_states)[name = string("mean_1_cast_fp16")];
tensor<fp16, [1, 1, 3072]> input_1_cast_fp16 = sub(x = hidden_states, y = mean_1_cast_fp16)[name = string("input_1_cast_fp16")];
tensor<int32, [1]> var_250_axes_0 = const()[name = string("op_250_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [3072]> model_model_layers_14_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_14_input_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387093952)))];
fp16 var_47_to_fp16 = const()[name = string("op_47_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, 1, 3072]> var_250_cast_fp16 = layer_norm(axes = var_250_axes_0, epsilon = var_47_to_fp16, gamma = model_model_layers_14_input_layernorm_weight_to_fp16, x = input_1_cast_fp16)[name = string("op_250_cast_fp16")];
tensor<int32, [3]> var_253 = const()[name = string("op_253"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [1]> var_255_axes_0 = const()[name = string("op_255_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 3072, 1]> var_254 = transpose(perm = var_253, x = var_250_cast_fp16)[name = string("transpose_27")];
tensor<fp16, [1, 3072, 1, 1]> var_255 = expand_dims(axes = var_255_axes_0, x = var_254)[name = string("op_255")];
string var_262_pad_type_0 = const()[name = string("op_262_pad_type_0"), val = string("valid")];
tensor<int32, [2]> var_262_strides_0 = const()[name = string("op_262_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> var_262_pad_0 = const()[name = string("op_262_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> var_262_dilations_0 = const()[name = string("op_262_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 var_262_groups_0 = const()[name = string("op_262_groups_0"), val = int32(1)];
tensor<fp16, [1, 3072, 1, 1]> var_262 = conv(dilations = var_262_dilations_0, groups = var_262_groups_0, pad = var_262_pad_0, pad_type = var_262_pad_type_0, strides = var_262_strides_0, weight = model_model_layers_14_self_attn_q_proj_weight_palettized, x = var_255)[name = string("op_262")];
tensor<int32, [4]> var_263 = const()[name = string("op_263"), val = tensor<int32, [4]>([1, 24, 1, 128])];
tensor<fp16, [1, 24, 1, 128]> var_264 = reshape(shape = var_263, x = var_262)[name = string("op_264")];
string var_271_pad_type_0 = const()[name = string("op_271_pad_type_0"), val = string("valid")];
tensor<int32, [2]> var_271_strides_0 = const()[name = string("op_271_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> var_271_pad_0 = const()[name = string("op_271_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> var_271_dilations_0 = const()[name = string("op_271_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 var_271_groups_0 = const()[name = string("op_271_groups_0"), val = int32(1)];
tensor<fp16, [1, 1024, 1, 1]> var_271 = conv(dilations = var_271_dilations_0, groups = var_271_groups_0, pad = var_271_pad_0, pad_type = var_271_pad_type_0, strides = var_271_strides_0, weight = model_model_layers_14_self_attn_k_proj_weight_palettized, x = var_255)[name = string("op_271")];
tensor<int32, [4]> var_272 = const()[name = string("op_272"), val = tensor<int32, [4]>([1, 8, 1, 128])];
tensor<fp16, [1, 8, 1, 128]> var_273 = reshape(shape = var_272, x = var_271)[name = string("op_273")];
string var_280_pad_type_0 = const()[name = string("op_280_pad_type_0"), val = string("valid")];
tensor<int32, [2]> var_280_strides_0 = const()[name = string("op_280_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> var_280_pad_0 = const()[name = string("op_280_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> var_280_dilations_0 = const()[name = string("op_280_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 var_280_groups_0 = const()[name = string("op_280_groups_0"), val = int32(1)];
tensor<fp16, [1, 1024, 1, 1]> var_280 = conv(dilations = var_280_dilations_0, groups = var_280_groups_0, pad = var_280_pad_0, pad_type = var_280_pad_type_0, strides = var_280_strides_0, weight = model_model_layers_14_self_attn_v_proj_weight_palettized, x = var_255)[name = string("op_280")];
tensor<int32, [4]> var_281 = const()[name = string("op_281"), val = tensor<int32, [4]>([1, 8, 1, 128])];
tensor<fp16, [1, 8, 1, 128]> var_282 = reshape(shape = var_281, x = var_280)[name = string("op_282")];
tensor<int32, [4]> x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor<int32, [4]>([1, 24, 1, 64])];
tensor<bool, [4]> x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 24, 1, 64]> x1_1 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = var_264)[name = string("x1_1")];
tensor<int32, [4]> x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor<int32, [4]>([1, 24, 1, 128])];
tensor<bool, [4]> x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 24, 1, 64]> x2_1 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = var_264)[name = string("x2_1")];
tensor<int32, [4]> cos_3_begin_0 = const()[name = string("cos_3_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> cos_3_end_0 = const()[name = string("cos_3_end_0"), val = tensor<int32, [4]>([1, 1, 1, 64])];
tensor<bool, [4]> cos_3_end_mask_0 = const()[name = string("cos_3_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 1, 1, 64]> cos_3_cast_fp16 = slice_by_index(begin = cos_3_begin_0, end = cos_3_end_0, end_mask = cos_3_end_mask_0, x = cos_1_cast_fp16)[name = string("cos_3_cast_fp16")];
tensor<int32, [4]> sin_3_begin_0 = const()[name = string("sin_3_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> sin_3_end_0 = const()[name = string("sin_3_end_0"), val = tensor<int32, [4]>([1, 1, 1, 64])];
tensor<bool, [4]> sin_3_end_mask_0 = const()[name = string("sin_3_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 1, 1, 64]> sin_3_cast_fp16 = slice_by_index(begin = sin_3_begin_0, end = sin_3_end_0, end_mask = sin_3_end_mask_0, x = sin_1_cast_fp16)[name = string("sin_3_cast_fp16")];
tensor<fp16, [1, 24, 1, 64]> var_296_cast_fp16 = mul(x = x1_1, y = cos_3_cast_fp16)[name = string("op_296_cast_fp16")];
tensor<fp16, [1, 24, 1, 64]> var_297_cast_fp16 = mul(x = x2_1, y = sin_3_cast_fp16)[name = string("op_297_cast_fp16")];
tensor<fp16, [1, 24, 1, 64]> var_298_cast_fp16 = sub(x = var_296_cast_fp16, y = var_297_cast_fp16)[name = string("op_298_cast_fp16")];
tensor<fp16, [1, 24, 1, 64]> var_299_cast_fp16 = mul(x = x2_1, y = cos_3_cast_fp16)[name = string("op_299_cast_fp16")];
tensor<fp16, [1, 24, 1, 64]> var_300_cast_fp16 = mul(x = x1_1, y = sin_3_cast_fp16)[name = string("op_300_cast_fp16")];
tensor<fp16, [1, 24, 1, 64]> var_301_cast_fp16 = add(x = var_299_cast_fp16, y = var_300_cast_fp16)[name = string("op_301_cast_fp16")];
bool rotated_1_interleave_0 = const()[name = string("rotated_1_interleave_0"), val = bool(false)];
tensor<fp16, [1, 24, 1, 128]> rotated_1_cast_fp16 = concat(axis = var_52, interleave = rotated_1_interleave_0, values = (var_298_cast_fp16, var_301_cast_fp16))[name = string("rotated_1_cast_fp16")];
tensor<int32, [4]> x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor<int32, [4]>([1, 8, 1, 64])];
tensor<bool, [4]> x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 8, 1, 64]> x1_3 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = var_273)[name = string("x1_3")];
tensor<int32, [4]> x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor<int32, [4]>([1, 8, 1, 128])];
tensor<bool, [4]> x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 8, 1, 64]> x2_3 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = var_273)[name = string("x2_3")];
tensor<fp16, [1, 8, 1, 64]> var_317_cast_fp16 = mul(x = x1_3, y = cos_3_cast_fp16)[name = string("op_317_cast_fp16")];
tensor<fp16, [1, 8, 1, 64]> var_318_cast_fp16 = mul(x = x2_3, y = sin_3_cast_fp16)[name = string("op_318_cast_fp16")];
tensor<fp16, [1, 8, 1, 64]> var_319_cast_fp16 = sub(x = var_317_cast_fp16, y = var_318_cast_fp16)[name = string("op_319_cast_fp16")];
tensor<fp16, [1, 8, 1, 64]> var_320_cast_fp16 = mul(x = x2_3, y = cos_3_cast_fp16)[name = string("op_320_cast_fp16")];
tensor<fp16, [1, 8, 1, 64]> var_321_cast_fp16 = mul(x = x1_3, y = sin_3_cast_fp16)[name = string("op_321_cast_fp16")];
tensor<fp16, [1, 8, 1, 64]> var_322_cast_fp16 = add(x = var_320_cast_fp16, y = var_321_cast_fp16)[name = string("op_322_cast_fp16")];
bool rotated_3_interleave_0 = const()[name = string("rotated_3_interleave_0"), val = bool(false)];
tensor<fp16, [1, 8, 1, 128]> rotated_3_cast_fp16 = concat(axis = var_52, interleave = rotated_3_interleave_0, values = (var_319_cast_fp16, var_322_cast_fp16))[name = string("rotated_3_cast_fp16")];
int32 var_326 = const()[name = string("op_326"), val = int32(1)];
tensor<int32, [1]> var_327 = add(x = current_pos, y = var_326)[name = string("op_327")];
tensor<fp16, [56, 8, 1024, 128]> read_state_0 = read_state(input = model_model_kv_cache_0)[name = string("read_state_0")];
tensor<int32, [1]> expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor<int32, [1]>([14])];
tensor<int32, [1]> expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor<int32, [1]>([15])];
int32 concat_2_axis_0 = const()[name = string("concat_2_axis_0"), val = int32(0)];
bool concat_2_interleave_0 = const()[name = string("concat_2_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_2 = concat(axis = concat_2_axis_0, interleave = concat_2_interleave_0, values = (expand_dims_0, expand_dims_1, current_pos, expand_dims_3))[name = string("concat_2")];
tensor<int32, [1]> concat_3_values1_0 = const()[name = string("concat_3_values1_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor<int32, [1]>([0])];
int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)];
bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_4, concat_3_values1_0, var_327, concat_3_values3_0))[name = string("concat_3")];
tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_1_stride_0, update = rotated_3_cast_fp16, x = read_state_0)[name = string("model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16")];
write_state(data = model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_0_write_state")];
tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_14 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_0")];
tensor<int32, [1]> expand_dims_6 = const()[name = string("expand_dims_6"), val = tensor<int32, [1]>([42])];
tensor<int32, [1]> expand_dims_7 = const()[name = string("expand_dims_7"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_9 = const()[name = string("expand_dims_9"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_10 = const()[name = string("expand_dims_10"), val = tensor<int32, [1]>([43])];
int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)];
bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (expand_dims_6, expand_dims_7, current_pos, expand_dims_9))[name = string("concat_6")];
tensor<int32, [1]> concat_7_values1_0 = const()[name = string("concat_7_values1_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> concat_7_values3_0 = const()[name = string("concat_7_values3_0"), val = tensor<int32, [1]>([0])];
int32 concat_7_axis_0 = const()[name = string("concat_7_axis_0"), val = int32(0)];
bool concat_7_interleave_0 = const()[name = string("concat_7_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_7 = concat(axis = concat_7_axis_0, interleave = concat_7_interleave_0, values = (expand_dims_10, concat_7_values1_0, var_327, concat_7_values3_0))[name = string("concat_7")];
tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_6, begin_mask = model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0, end = concat_7, end_mask = model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_2_stride_0, update = var_282, x = coreml_update_state_14)[name = string("model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16")];
write_state(data = model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_1_write_state")];
tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_15 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_1")];
tensor<int32, [4]> var_342_begin_0 = const()[name = string("op_342_begin_0"), val = tensor<int32, [4]>([14, 0, 0, 0])];
tensor<int32, [4]> var_342_end_0 = const()[name = string("op_342_end_0"), val = tensor<int32, [4]>([15, 8, 1024, 128])];
tensor<bool, [4]> var_342_end_mask_0 = const()[name = string("op_342_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<fp16, [1, 8, 1024, 128]> var_342_cast_fp16 = slice_by_index(begin = var_342_begin_0, end = var_342_end_0, end_mask = var_342_end_mask_0, x = coreml_update_state_15)[name = string("op_342_cast_fp16")];
tensor<int32, [1]> K_layer_cache_1_axes_0 = const()[name = string("K_layer_cache_1_axes_0"), val = tensor<int32, [1]>([0])];
tensor<fp16, [8, 1024, 128]> K_layer_cache_1_cast_fp16 = squeeze(axes = K_layer_cache_1_axes_0, x = var_342_cast_fp16)[name = string("K_layer_cache_1_cast_fp16")];
tensor<int32, [4]> var_344_begin_0 = const()[name = string("op_344_begin_0"), val = tensor<int32, [4]>([42, 0, 0, 0])];
tensor<int32, [4]> var_344_end_0 = const()[name = string("op_344_end_0"), val = tensor<int32, [4]>([43, 8, 1024, 128])];
tensor<bool, [4]> var_344_end_mask_0 = const()[name = string("op_344_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<fp16, [1, 8, 1024, 128]> var_344_cast_fp16 = slice_by_index(begin = var_344_begin_0, end = var_344_end_0, end_mask = var_344_end_mask_0, x = coreml_update_state_15)[name = string("op_344_cast_fp16")];
tensor<int32, [1]> V_layer_cache_1_axes_0 = const()[name = string("V_layer_cache_1_axes_0"), val = tensor<int32, [1]>([0])];
tensor<fp16, [8, 1024, 128]> V_layer_cache_1_cast_fp16 = squeeze(axes = V_layer_cache_1_axes_0, x = var_344_cast_fp16)[name = string("V_layer_cache_1_cast_fp16")];
tensor<int32, [1]> x_11_axes_0 = const()[name = string("x_11_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [8, 1, 1024, 128]> x_11_cast_fp16 = expand_dims(axes = x_11_axes_0, x = K_layer_cache_1_cast_fp16)[name = string("x_11_cast_fp16")];
tensor<int32, [4]> var_353 = const()[name = string("op_353"), val = tensor<int32, [4]>([1, 3, 1, 1])];
tensor<fp16, [8, 3, 1024, 128]> x_13_cast_fp16 = tile(reps = var_353, x = x_11_cast_fp16)[name = string("x_13_cast_fp16")];
tensor<int32, [4]> var_357 = const()[name = string("op_357"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
tensor<fp16, [1, 24, 1024, 128]> key_states_3_cast_fp16 = reshape(shape = var_357, x = x_13_cast_fp16)[name = string("key_states_3_cast_fp16")];
tensor<int32, [1]> x_17_axes_0 = const()[name = string("x_17_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [8, 1, 1024, 128]> x_17_cast_fp16 = expand_dims(axes = x_17_axes_0, x = V_layer_cache_1_cast_fp16)[name = string("x_17_cast_fp16")];
tensor<int32, [4]> var_360 = const()[name = string("op_360"), val = tensor<int32, [4]>([1, 3, 1, 1])];
tensor<fp16, [8, 3, 1024, 128]> x_19_cast_fp16 = tile(reps = var_360, x = x_17_cast_fp16)[name = string("x_19_cast_fp16")];
tensor<int32, [4]> var_364 = const()[name = string("op_364"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
tensor<fp16, [1, 24, 1024, 128]> value_states_3_cast_fp16 = reshape(shape = var_364, x = x_19_cast_fp16)[name = string("value_states_3_cast_fp16")];
bool var_367_transpose_x_1 = const()[name = string("op_367_transpose_x_1"), val = bool(false)];
bool var_367_transpose_y_1 = const()[name = string("op_367_transpose_y_1"), val = bool(true)];
tensor<fp16, [1, 24, 1, 1024]> var_367_cast_fp16 = matmul(transpose_x = var_367_transpose_x_1, transpose_y = var_367_transpose_y_1, x = rotated_1_cast_fp16, y = key_states_3_cast_fp16)[name = string("op_367_cast_fp16")];
fp16 var_368_to_fp16 = const()[name = string("op_368_to_fp16"), val = fp16(0x1.6ap-4)];
tensor<fp16, [1, 24, 1, 1024]> attn_weights_1_cast_fp16 = mul(x = var_367_cast_fp16, y = var_368_to_fp16)[name = string("attn_weights_1_cast_fp16")];
tensor<fp16, [1, 24, 1, 1024]> x_21_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask)[name = string("x_21_cast_fp16")];
tensor<int32, [1]> reduce_max_0_axes_0 = const()[name = string("reduce_max_0_axes_0"), val = tensor<int32, [1]>([-1])];
bool reduce_max_0_keep_dims_0 = const()[name = string("reduce_max_0_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 24, 1, 1]> reduce_max_0_cast_fp16 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = x_21_cast_fp16)[name = string("reduce_max_0_cast_fp16")];
tensor<fp16, [1, 24, 1, 1024]> x_23_cast_fp16 = sub(x = x_21_cast_fp16, y = reduce_max_0_cast_fp16)[name = string("x_23_cast_fp16")];
tensor<fp16, [1, 24, 1, 1024]> exp_x_1_cast_fp16 = exp(x = x_23_cast_fp16)[name = string("exp_x_1_cast_fp16")];
tensor<int32, [1]> var_379_axes_0 = const()[name = string("op_379_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_379_keep_dims_0 = const()[name = string("op_379_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 24, 1, 1]> var_379_cast_fp16 = reduce_sum(axes = var_379_axes_0, keep_dims = var_379_keep_dims_0, x = exp_x_1_cast_fp16)[name = string("op_379_cast_fp16")];
tensor<fp16, [1, 24, 1, 1024]> attn_weights_3_cast_fp16 = real_div(x = exp_x_1_cast_fp16, y = var_379_cast_fp16)[name = string("attn_weights_3_cast_fp16")];
bool attn_output_1_transpose_x_0 = const()[name = string("attn_output_1_transpose_x_0"), val = bool(false)];
bool attn_output_1_transpose_y_0 = const()[name = string("attn_output_1_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 24, 1, 128]> attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_0, transpose_y = attn_output_1_transpose_y_0, x = attn_weights_3_cast_fp16, y = value_states_3_cast_fp16)[name = string("attn_output_1_cast_fp16")];
tensor<int32, [4]> var_382_perm_0 = const()[name = string("op_382_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_384 = const()[name = string("op_384"), val = tensor<int32, [3]>([1, 1, 3072])];
tensor<fp16, [1, 1, 24, 128]> var_382_cast_fp16 = transpose(perm = var_382_perm_0, x = attn_output_1_cast_fp16)[name = string("transpose_26")];
tensor<fp16, [1, 1, 3072]> input_5_cast_fp16 = reshape(shape = var_384, x = var_382_cast_fp16)[name = string("input_5_cast_fp16")];
tensor<fp16, [3072, 3072]> model_model_layers_14_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387100160))), lut = tensor<fp16, [384, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391818816))))[name = string("model_model_layers_14_self_attn_o_proj_weight_promoted_to_fp16_palettized")];
tensor<fp16, [3072]> linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391831168)))];
tensor<fp16, [1, 1, 3072]> linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_14_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_5_cast_fp16)[name = string("linear_0_cast_fp16")];
tensor<fp16, [1, 1, 3072]> hidden_states_5_cast_fp16 = add(x = hidden_states, y = linear_0_cast_fp16)[name = string("hidden_states_5_cast_fp16")];
tensor<int32, [1]> mean_3_axes_0 = const()[name = string("mean_3_axes_0"), val = tensor<int32, [1]>([-1])];
bool mean_3_keep_dims_0 = const()[name = string("mean_3_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 1, 1]> mean_3_cast_fp16 = reduce_mean(axes = mean_3_axes_0, keep_dims = mean_3_keep_dims_0, x = hidden_states_5_cast_fp16)[name = string("mean_3_cast_fp16")];
tensor<fp16, [1, 1, 3072]> input_7_cast_fp16 = sub(x = hidden_states_5_cast_fp16, y = mean_3_cast_fp16)[name = string("input_7_cast_fp16")];
tensor<int32, [1]> var_395_axes_0 = const()[name = string("op_395_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [3072]> model_model_layers_14_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_14_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391837376)))];
tensor<fp16, [1, 1, 3072]> var_395_cast_fp16 = layer_norm(axes = var_395_axes_0, epsilon = var_47_to_fp16, gamma = model_model_layers_14_post_attention_layernorm_weight_to_fp16, x = input_7_cast_fp16)[name = string("op_395_cast_fp16")];
tensor<int32, [3]> var_402 = const()[name = string("op_402"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [1]> input_9_axes_0 = const()[name = string("input_9_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 3072, 1]> var_403 = transpose(perm = var_402, x = var_395_cast_fp16)[name = string("transpose_25")];
tensor<fp16, [1, 3072, 1, 1]> input_9 = expand_dims(axes = input_9_axes_0, x = var_403)[name = string("input_9")];
string input_11_pad_type_0 = const()[name = string("input_11_pad_type_0"), val = string("valid")];
tensor<int32, [2]> input_11_strides_0 = const()[name = string("input_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> input_11_pad_0 = const()[name = string("input_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> input_11_dilations_0 = const()[name = string("input_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 input_11_groups_0 = const()[name = string("input_11_groups_0"), val = int32(1)];
tensor<fp16, [1, 8192, 1, 1]> input_11 = conv(dilations = input_11_dilations_0, groups = input_11_groups_0, pad = input_11_pad_0, pad_type = input_11_pad_type_0, strides = input_11_strides_0, weight = model_model_layers_14_mlp_gate_proj_weight_palettized, x = input_9)[name = string("input_11")];
string up_states_1_pad_type_0 = const()[name = string("up_states_1_pad_type_0"), val = string("valid")];
tensor<int32, [2]> up_states_1_strides_0 = const()[name = string("up_states_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> up_states_1_pad_0 = const()[name = string("up_states_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> up_states_1_dilations_0 = const()[name = string("up_states_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 up_states_1_groups_0 = const()[name = string("up_states_1_groups_0"), val = int32(1)];
tensor<fp16, [1, 8192, 1, 1]> up_states_1 = conv(dilations = up_states_1_dilations_0, groups = up_states_1_groups_0, pad = up_states_1_pad_0, pad_type = up_states_1_pad_type_0, strides = up_states_1_strides_0, weight = model_model_layers_14_mlp_up_proj_weight_palettized, x = input_9)[name = string("up_states_1")];
tensor<fp16, [1, 8192, 1, 1]> gate_states_1 = silu(x = input_11)[name = string("gate_states_1")];
tensor<fp16, [1, 8192, 1, 1]> input_13 = mul(x = gate_states_1, y = up_states_1)[name = string("input_13")];
string hidden_states_7_pad_type_0 = const()[name = string("hidden_states_7_pad_type_0"), val = string("valid")];
tensor<int32, [2]> hidden_states_7_strides_0 = const()[name = string("hidden_states_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> hidden_states_7_pad_0 = const()[name = string("hidden_states_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> hidden_states_7_dilations_0 = const()[name = string("hidden_states_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 hidden_states_7_groups_0 = const()[name = string("hidden_states_7_groups_0"), val = int32(1)];
tensor<fp16, [1, 3072, 1, 1]> hidden_states_7 = conv(dilations = hidden_states_7_dilations_0, groups = hidden_states_7_groups_0, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = hidden_states_7_strides_0, weight = model_model_layers_14_mlp_down_proj_weight_palettized, x = input_13)[name = string("hidden_states_7")];
tensor<int32, [1]> var_425_axes_0 = const()[name = string("op_425_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 3072, 1]> var_425 = squeeze(axes = var_425_axes_0, x = hidden_states_7)[name = string("op_425")];
tensor<int32, [3]> var_426 = const()[name = string("op_426"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 1, 3072]> var_427 = transpose(perm = var_426, x = var_425)[name = string("transpose_24")];
tensor<fp16, [1, 1, 3072]> hidden_states_9_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = var_427)[name = string("hidden_states_9_cast_fp16")];
tensor<int32, [1]> mean_5_axes_0 = const()[name = string("mean_5_axes_0"), val = tensor<int32, [1]>([-1])];
bool mean_5_keep_dims_0 = const()[name = string("mean_5_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 1, 1]> mean_5_cast_fp16 = reduce_mean(axes = mean_5_axes_0, keep_dims = mean_5_keep_dims_0, x = hidden_states_9_cast_fp16)[name = string("mean_5_cast_fp16")];
tensor<fp16, [1, 1, 3072]> input_15_cast_fp16 = sub(x = hidden_states_9_cast_fp16, y = mean_5_cast_fp16)[name = string("input_15_cast_fp16")];
tensor<int32, [1]> var_435_axes_0 = const()[name = string("op_435_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [3072]> model_model_layers_15_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_15_input_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391843584)))];
tensor<fp16, [1, 1, 3072]> var_435_cast_fp16 = layer_norm(axes = var_435_axes_0, epsilon = var_47_to_fp16, gamma = model_model_layers_15_input_layernorm_weight_to_fp16, x = input_15_cast_fp16)[name = string("op_435_cast_fp16")];
tensor<int32, [3]> var_438 = const()[name = string("op_438"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [1]> var_440_axes_0 = const()[name = string("op_440_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 3072, 1]> var_439 = transpose(perm = var_438, x = var_435_cast_fp16)[name = string("transpose_23")];
tensor<fp16, [1, 3072, 1, 1]> var_440 = expand_dims(axes = var_440_axes_0, x = var_439)[name = string("op_440")];
string var_447_pad_type_0 = const()[name = string("op_447_pad_type_0"), val = string("valid")];
tensor<int32, [2]> var_447_strides_0 = const()[name = string("op_447_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> var_447_pad_0 = const()[name = string("op_447_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> var_447_dilations_0 = const()[name = string("op_447_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 var_447_groups_0 = const()[name = string("op_447_groups_0"), val = int32(1)];
tensor<fp16, [1, 3072, 1, 1]> var_447 = conv(dilations = var_447_dilations_0, groups = var_447_groups_0, pad = var_447_pad_0, pad_type = var_447_pad_type_0, strides = var_447_strides_0, weight = model_model_layers_15_self_attn_q_proj_weight_palettized, x = var_440)[name = string("op_447")];
tensor<int32, [4]> var_448 = const()[name = string("op_448"), val = tensor<int32, [4]>([1, 24, 1, 128])];
tensor<fp16, [1, 24, 1, 128]> var_449 = reshape(shape = var_448, x = var_447)[name = string("op_449")];
string var_456_pad_type_0 = const()[name = string("op_456_pad_type_0"), val = string("valid")];
tensor<int32, [2]> var_456_strides_0 = const()[name = string("op_456_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> var_456_pad_0 = const()[name = string("op_456_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> var_456_dilations_0 = const()[name = string("op_456_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 var_456_groups_0 = const()[name = string("op_456_groups_0"), val = int32(1)];
tensor<fp16, [1, 1024, 1, 1]> var_456 = conv(dilations = var_456_dilations_0, groups = var_456_groups_0, pad = var_456_pad_0, pad_type = var_456_pad_type_0, strides = var_456_strides_0, weight = model_model_layers_15_self_attn_k_proj_weight_palettized, x = var_440)[name = string("op_456")];
tensor<int32, [4]> var_457 = const()[name = string("op_457"), val = tensor<int32, [4]>([1, 8, 1, 128])];
tensor<fp16, [1, 8, 1, 128]> var_458 = reshape(shape = var_457, x = var_456)[name = string("op_458")];
string var_465_pad_type_0 = const()[name = string("op_465_pad_type_0"), val = string("valid")];
tensor<int32, [2]> var_465_strides_0 = const()[name = string("op_465_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> var_465_pad_0 = const()[name = string("op_465_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> var_465_dilations_0 = const()[name = string("op_465_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 var_465_groups_0 = const()[name = string("op_465_groups_0"), val = int32(1)];
tensor<fp16, [1, 1024, 1, 1]> var_465 = conv(dilations = var_465_dilations_0, groups = var_465_groups_0, pad = var_465_pad_0, pad_type = var_465_pad_type_0, strides = var_465_strides_0, weight = model_model_layers_15_self_attn_v_proj_weight_palettized, x = var_440)[name = string("op_465")];
tensor<int32, [4]> var_466 = const()[name = string("op_466"), val = tensor<int32, [4]>([1, 8, 1, 128])];
tensor<fp16, [1, 8, 1, 128]> var_467 = reshape(shape = var_466, x = var_465)[name = string("op_467")];
tensor<int32, [4]> x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor<int32, [4]>([1, 24, 1, 64])];
tensor<bool, [4]> x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 24, 1, 64]> x1_5 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = var_449)[name = string("x1_5")];
tensor<int32, [4]> x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor<int32, [4]>([1, 24, 1, 128])];
tensor<bool, [4]> x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 24, 1, 64]> x2_5 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = var_449)[name = string("x2_5")];
tensor<fp16, [1, 24, 1, 64]> var_481_cast_fp16 = mul(x = x1_5, y = cos_3_cast_fp16)[name = string("op_481_cast_fp16")];
tensor<fp16, [1, 24, 1, 64]> var_482_cast_fp16 = mul(x = x2_5, y = sin_3_cast_fp16)[name = string("op_482_cast_fp16")];
tensor<fp16, [1, 24, 1, 64]> var_483_cast_fp16 = sub(x = var_481_cast_fp16, y = var_482_cast_fp16)[name = string("op_483_cast_fp16")];
tensor<fp16, [1, 24, 1, 64]> var_484_cast_fp16 = mul(x = x2_5, y = cos_3_cast_fp16)[name = string("op_484_cast_fp16")];
tensor<fp16, [1, 24, 1, 64]> var_485_cast_fp16 = mul(x = x1_5, y = sin_3_cast_fp16)[name = string("op_485_cast_fp16")];
tensor<fp16, [1, 24, 1, 64]> var_486_cast_fp16 = add(x = var_484_cast_fp16, y = var_485_cast_fp16)[name = string("op_486_cast_fp16")];
bool rotated_5_interleave_0 = const()[name = string("rotated_5_interleave_0"), val = bool(false)];
tensor<fp16, [1, 24, 1, 128]> rotated_5_cast_fp16 = concat(axis = var_52, interleave = rotated_5_interleave_0, values = (var_483_cast_fp16, var_486_cast_fp16))[name = string("rotated_5_cast_fp16")];
tensor<int32, [4]> x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor<int32, [4]>([1, 8, 1, 64])];
tensor<bool, [4]> x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 8, 1, 64]> x1_7 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = var_458)[name = string("x1_7")];
tensor<int32, [4]> x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor<int32, [4]>([1, 8, 1, 128])];
tensor<bool, [4]> x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 8, 1, 64]> x2_7 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = var_458)[name = string("x2_7")];
tensor<fp16, [1, 8, 1, 64]> var_502_cast_fp16 = mul(x = x1_7, y = cos_3_cast_fp16)[name = string("op_502_cast_fp16")];
tensor<fp16, [1, 8, 1, 64]> var_503_cast_fp16 = mul(x = x2_7, y = sin_3_cast_fp16)[name = string("op_503_cast_fp16")];
tensor<fp16, [1, 8, 1, 64]> var_504_cast_fp16 = sub(x = var_502_cast_fp16, y = var_503_cast_fp16)[name = string("op_504_cast_fp16")];
tensor<fp16, [1, 8, 1, 64]> var_505_cast_fp16 = mul(x = x2_7, y = cos_3_cast_fp16)[name = string("op_505_cast_fp16")];
tensor<fp16, [1, 8, 1, 64]> var_506_cast_fp16 = mul(x = x1_7, y = sin_3_cast_fp16)[name = string("op_506_cast_fp16")];
tensor<fp16, [1, 8, 1, 64]> var_507_cast_fp16 = add(x = var_505_cast_fp16, y = var_506_cast_fp16)[name = string("op_507_cast_fp16")];
bool rotated_7_interleave_0 = const()[name = string("rotated_7_interleave_0"), val = bool(false)];
tensor<fp16, [1, 8, 1, 128]> rotated_7_cast_fp16 = concat(axis = var_52, interleave = rotated_7_interleave_0, values = (var_504_cast_fp16, var_507_cast_fp16))[name = string("rotated_7_cast_fp16")];
tensor<int32, [1]> expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor<int32, [1]>([15])];
tensor<int32, [1]> expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor<int32, [1]>([16])];
int32 concat_10_axis_0 = const()[name = string("concat_10_axis_0"), val = int32(0)];
bool concat_10_interleave_0 = const()[name = string("concat_10_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_10 = concat(axis = concat_10_axis_0, interleave = concat_10_interleave_0, values = (expand_dims_12, expand_dims_13, current_pos, expand_dims_15))[name = string("concat_10")];
tensor<int32, [1]> concat_11_values1_0 = const()[name = string("concat_11_values1_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> concat_11_values3_0 = const()[name = string("concat_11_values3_0"), val = tensor<int32, [1]>([0])];
int32 concat_11_axis_0 = const()[name = string("concat_11_axis_0"), val = int32(0)];
bool concat_11_interleave_0 = const()[name = string("concat_11_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_11 = concat(axis = concat_11_axis_0, interleave = concat_11_interleave_0, values = (expand_dims_16, concat_11_values1_0, var_327, concat_11_values3_0))[name = string("concat_11")];
tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_10, begin_mask = model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0, end = concat_11, end_mask = model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_3_stride_0, update = rotated_7_cast_fp16, x = coreml_update_state_15)[name = string("model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16")];
write_state(data = model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_2_write_state")];
tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_16 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_2")];
tensor<int32, [1]> expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor<int32, [1]>([43])];
tensor<int32, [1]> expand_dims_19 = const()[name = string("expand_dims_19"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor<int32, [1]>([44])];
int32 concat_14_axis_0 = const()[name = string("concat_14_axis_0"), val = int32(0)];
bool concat_14_interleave_0 = const()[name = string("concat_14_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_14 = concat(axis = concat_14_axis_0, interleave = concat_14_interleave_0, values = (expand_dims_18, expand_dims_19, current_pos, expand_dims_21))[name = string("concat_14")];
tensor<int32, [1]> concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> concat_15_values3_0 = const()[name = string("concat_15_values3_0"), val = tensor<int32, [1]>([0])];
int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)];
bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (expand_dims_22, concat_15_values1_0, var_327, concat_15_values3_0))[name = string("concat_15")];
tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_14, begin_mask = model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0, end = concat_15, end_mask = model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_4_stride_0, update = var_467, x = coreml_update_state_16)[name = string("model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16")];
write_state(data = model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_3_write_state")];
tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_17 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_3")];
tensor<int32, [4]> var_527_begin_0 = const()[name = string("op_527_begin_0"), val = tensor<int32, [4]>([15, 0, 0, 0])];
tensor<int32, [4]> var_527_end_0 = const()[name = string("op_527_end_0"), val = tensor<int32, [4]>([16, 8, 1024, 128])];
tensor<bool, [4]> var_527_end_mask_0 = const()[name = string("op_527_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<fp16, [1, 8, 1024, 128]> var_527_cast_fp16 = slice_by_index(begin = var_527_begin_0, end = var_527_end_0, end_mask = var_527_end_mask_0, x = coreml_update_state_17)[name = string("op_527_cast_fp16")];
tensor<int32, [1]> K_layer_cache_3_axes_0 = const()[name = string("K_layer_cache_3_axes_0"), val = tensor<int32, [1]>([0])];
tensor<fp16, [8, 1024, 128]> K_layer_cache_3_cast_fp16 = squeeze(axes = K_layer_cache_3_axes_0, x = var_527_cast_fp16)[name = string("K_layer_cache_3_cast_fp16")];
tensor<int32, [4]> var_529_begin_0 = const()[name = string("op_529_begin_0"), val = tensor<int32, [4]>([43, 0, 0, 0])];
tensor<int32, [4]> var_529_end_0 = const()[name = string("op_529_end_0"), val = tensor<int32, [4]>([44, 8, 1024, 128])];
tensor<bool, [4]> var_529_end_mask_0 = const()[name = string("op_529_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<fp16, [1, 8, 1024, 128]> var_529_cast_fp16 = slice_by_index(begin = var_529_begin_0, end = var_529_end_0, end_mask = var_529_end_mask_0, x = coreml_update_state_17)[name = string("op_529_cast_fp16")];
tensor<int32, [1]> V_layer_cache_3_axes_0 = const()[name = string("V_layer_cache_3_axes_0"), val = tensor<int32, [1]>([0])];
tensor<fp16, [8, 1024, 128]> V_layer_cache_3_cast_fp16 = squeeze(axes = V_layer_cache_3_axes_0, x = var_529_cast_fp16)[name = string("V_layer_cache_3_cast_fp16")];
tensor<int32, [1]> x_39_axes_0 = const()[name = string("x_39_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [8, 1, 1024, 128]> x_39_cast_fp16 = expand_dims(axes = x_39_axes_0, x = K_layer_cache_3_cast_fp16)[name = string("x_39_cast_fp16")];
tensor<int32, [4]> var_538 = const()[name = string("op_538"), val = tensor<int32, [4]>([1, 3, 1, 1])];
tensor<fp16, [8, 3, 1024, 128]> x_41_cast_fp16 = tile(reps = var_538, x = x_39_cast_fp16)[name = string("x_41_cast_fp16")];
tensor<int32, [4]> var_542 = const()[name = string("op_542"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
tensor<fp16, [1, 24, 1024, 128]> key_states_7_cast_fp16 = reshape(shape = var_542, x = x_41_cast_fp16)[name = string("key_states_7_cast_fp16")];
tensor<int32, [1]> x_45_axes_0 = const()[name = string("x_45_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [8, 1, 1024, 128]> x_45_cast_fp16 = expand_dims(axes = x_45_axes_0, x = V_layer_cache_3_cast_fp16)[name = string("x_45_cast_fp16")];
tensor<int32, [4]> var_545 = const()[name = string("op_545"), val = tensor<int32, [4]>([1, 3, 1, 1])];
tensor<fp16, [8, 3, 1024, 128]> x_47_cast_fp16 = tile(reps = var_545, x = x_45_cast_fp16)[name = string("x_47_cast_fp16")];
tensor<int32, [4]> var_549 = const()[name = string("op_549"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
tensor<fp16, [1, 24, 1024, 128]> value_states_7_cast_fp16 = reshape(shape = var_549, x = x_47_cast_fp16)[name = string("value_states_7_cast_fp16")];
bool var_552_transpose_x_1 = const()[name = string("op_552_transpose_x_1"), val = bool(false)];
bool var_552_transpose_y_1 = const()[name = string("op_552_transpose_y_1"), val = bool(true)];
tensor<fp16, [1, 24, 1, 1024]> var_552_cast_fp16 = matmul(transpose_x = var_552_transpose_x_1, transpose_y = var_552_transpose_y_1, x = rotated_5_cast_fp16, y = key_states_7_cast_fp16)[name = string("op_552_cast_fp16")];
fp16 var_553_to_fp16 = const()[name = string("op_553_to_fp16"), val = fp16(0x1.6ap-4)];
tensor<fp16, [1, 24, 1, 1024]> attn_weights_5_cast_fp16 = mul(x = var_552_cast_fp16, y = var_553_to_fp16)[name = string("attn_weights_5_cast_fp16")];
tensor<fp16, [1, 24, 1, 1024]> x_49_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = causal_mask)[name = string("x_49_cast_fp16")];
tensor<int32, [1]> reduce_max_1_axes_0 = const()[name = string("reduce_max_1_axes_0"), val = tensor<int32, [1]>([-1])];
bool reduce_max_1_keep_dims_0 = const()[name = string("reduce_max_1_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 24, 1, 1]> reduce_max_1_cast_fp16 = reduce_max(axes = reduce_max_1_axes_0, keep_dims = reduce_max_1_keep_dims_0, x = x_49_cast_fp16)[name = string("reduce_max_1_cast_fp16")];
tensor<fp16, [1, 24, 1, 1024]> x_51_cast_fp16 = sub(x = x_49_cast_fp16, y = reduce_max_1_cast_fp16)[name = string("x_51_cast_fp16")];
tensor<fp16, [1, 24, 1, 1024]> exp_x_3_cast_fp16 = exp(x = x_51_cast_fp16)[name = string("exp_x_3_cast_fp16")];
tensor<int32, [1]> var_564_axes_0 = const()[name = string("op_564_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_564_keep_dims_0 = const()[name = string("op_564_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 24, 1, 1]> var_564_cast_fp16 = reduce_sum(axes = var_564_axes_0, keep_dims = var_564_keep_dims_0, x = exp_x_3_cast_fp16)[name = string("op_564_cast_fp16")];
tensor<fp16, [1, 24, 1, 1024]> attn_weights_7_cast_fp16 = real_div(x = exp_x_3_cast_fp16, y = var_564_cast_fp16)[name = string("attn_weights_7_cast_fp16")];
bool attn_output_7_transpose_x_0 = const()[name = string("attn_output_7_transpose_x_0"), val = bool(false)];
bool attn_output_7_transpose_y_0 = const()[name = string("attn_output_7_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 24, 1, 128]> attn_output_7_cast_fp16 = matmul(transpose_x = attn_output_7_transpose_x_0, transpose_y = attn_output_7_transpose_y_0, x = attn_weights_7_cast_fp16, y = value_states_7_cast_fp16)[name = string("attn_output_7_cast_fp16")];
tensor<int32, [4]> var_567_perm_0 = const()[name = string("op_567_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_569 = const()[name = string("op_569"), val = tensor<int32, [3]>([1, 1, 3072])];
tensor<fp16, [1, 1, 24, 128]> var_567_cast_fp16 = transpose(perm = var_567_perm_0, x = attn_output_7_cast_fp16)[name = string("transpose_22")];
tensor<fp16, [1, 1, 3072]> input_19_cast_fp16 = reshape(shape = var_569, x = var_567_cast_fp16)[name = string("input_19_cast_fp16")];
tensor<fp16, [3072, 3072]> model_model_layers_15_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391849792))), lut = tensor<fp16, [384, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396568448))))[name = string("model_model_layers_15_self_attn_o_proj_weight_promoted_to_fp16_palettized")];
tensor<fp16, [1, 1, 3072]> linear_1_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_15_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_19_cast_fp16)[name = string("linear_1_cast_fp16")];
tensor<fp16, [1, 1, 3072]> hidden_states_13_cast_fp16 = add(x = hidden_states_9_cast_fp16, y = linear_1_cast_fp16)[name = string("hidden_states_13_cast_fp16")];
tensor<int32, [1]> mean_7_axes_0 = const()[name = string("mean_7_axes_0"), val = tensor<int32, [1]>([-1])];
bool mean_7_keep_dims_0 = const()[name = string("mean_7_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 1, 1]> mean_7_cast_fp16 = reduce_mean(axes = mean_7_axes_0, keep_dims = mean_7_keep_dims_0, x = hidden_states_13_cast_fp16)[name = string("mean_7_cast_fp16")];
tensor<fp16, [1, 1, 3072]> input_21_cast_fp16 = sub(x = hidden_states_13_cast_fp16, y = mean_7_cast_fp16)[name = string("input_21_cast_fp16")];
tensor<int32, [1]> var_580_axes_0 = const()[name = string("op_580_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [3072]> model_model_layers_15_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_15_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396580800)))];
tensor<fp16, [1, 1, 3072]> var_580_cast_fp16 = layer_norm(axes = var_580_axes_0, epsilon = var_47_to_fp16, gamma = model_model_layers_15_post_attention_layernorm_weight_to_fp16, x = input_21_cast_fp16)[name = string("op_580_cast_fp16")];
tensor<int32, [3]> var_587 = const()[name = string("op_587"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [1]> input_23_axes_0 = const()[name = string("input_23_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 3072, 1]> var_588 = transpose(perm = var_587, x = var_580_cast_fp16)[name = string("transpose_21")];
tensor<fp16, [1, 3072, 1, 1]> input_23 = expand_dims(axes = input_23_axes_0, x = var_588)[name = string("input_23")];
string input_25_pad_type_0 = const()[name = string("input_25_pad_type_0"), val = string("valid")];
tensor<int32, [2]> input_25_strides_0 = const()[name = string("input_25_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> input_25_pad_0 = const()[name = string("input_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> input_25_dilations_0 = const()[name = string("input_25_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 input_25_groups_0 = const()[name = string("input_25_groups_0"), val = int32(1)];
tensor<fp16, [1, 8192, 1, 1]> input_25 = conv(dilations = input_25_dilations_0, groups = input_25_groups_0, pad = input_25_pad_0, pad_type = input_25_pad_type_0, strides = input_25_strides_0, weight = model_model_layers_15_mlp_gate_proj_weight_palettized, x = input_23)[name = string("input_25")];
string up_states_3_pad_type_0 = const()[name = string("up_states_3_pad_type_0"), val = string("valid")];
tensor<int32, [2]> up_states_3_strides_0 = const()[name = string("up_states_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> up_states_3_pad_0 = const()[name = string("up_states_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> up_states_3_dilations_0 = const()[name = string("up_states_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 up_states_3_groups_0 = const()[name = string("up_states_3_groups_0"), val = int32(1)];
tensor<fp16, [1, 8192, 1, 1]> up_states_3 = conv(dilations = up_states_3_dilations_0, groups = up_states_3_groups_0, pad = up_states_3_pad_0, pad_type = up_states_3_pad_type_0, strides = up_states_3_strides_0, weight = model_model_layers_15_mlp_up_proj_weight_palettized, x = input_23)[name = string("up_states_3")];
tensor<fp16, [1, 8192, 1, 1]> gate_states_3 = silu(x = input_25)[name = string("gate_states_3")];
tensor<fp16, [1, 8192, 1, 1]> input_27 = mul(x = gate_states_3, y = up_states_3)[name = string("input_27")];
string hidden_states_15_pad_type_0 = const()[name = string("hidden_states_15_pad_type_0"), val = string("valid")];
tensor<int32, [2]> hidden_states_15_strides_0 = const()[name = string("hidden_states_15_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> hidden_states_15_pad_0 = const()[name = string("hidden_states_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> hidden_states_15_dilations_0 = const()[name = string("hidden_states_15_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 hidden_states_15_groups_0 = const()[name = string("hidden_states_15_groups_0"), val = int32(1)];
tensor<fp16, [1, 3072, 1, 1]> hidden_states_15 = conv(dilations = hidden_states_15_dilations_0, groups = hidden_states_15_groups_0, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = hidden_states_15_strides_0, weight = model_model_layers_15_mlp_down_proj_weight_palettized, x = input_27)[name = string("hidden_states_15")];
tensor<int32, [1]> var_610_axes_0 = const()[name = string("op_610_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 3072, 1]> var_610 = squeeze(axes = var_610_axes_0, x = hidden_states_15)[name = string("op_610")];
tensor<int32, [3]> var_611 = const()[name = string("op_611"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 1, 3072]> var_612 = transpose(perm = var_611, x = var_610)[name = string("transpose_20")];
tensor<fp16, [1, 1, 3072]> hidden_states_17_cast_fp16 = add(x = hidden_states_13_cast_fp16, y = var_612)[name = string("hidden_states_17_cast_fp16")];
tensor<int32, [1]> mean_9_axes_0 = const()[name = string("mean_9_axes_0"), val = tensor<int32, [1]>([-1])];
bool mean_9_keep_dims_0 = const()[name = string("mean_9_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 1, 1]> mean_9_cast_fp16 = reduce_mean(axes = mean_9_axes_0, keep_dims = mean_9_keep_dims_0, x = hidden_states_17_cast_fp16)[name = string("mean_9_cast_fp16")];
tensor<fp16, [1, 1, 3072]> input_29_cast_fp16 = sub(x = hidden_states_17_cast_fp16, y = mean_9_cast_fp16)[name = string("input_29_cast_fp16")];
tensor<int32, [1]> var_620_axes_0 = const()[name = string("op_620_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [3072]> model_model_layers_16_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_16_input_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396587008)))];
tensor<fp16, [1, 1, 3072]> var_620_cast_fp16 = layer_norm(axes = var_620_axes_0, epsilon = var_47_to_fp16, gamma = model_model_layers_16_input_layernorm_weight_to_fp16, x = input_29_cast_fp16)[name = string("op_620_cast_fp16")];
tensor<int32, [3]> var_623 = const()[name = string("op_623"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [1]> var_625_axes_0 = const()[name = string("op_625_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 3072, 1]> var_624 = transpose(perm = var_623, x = var_620_cast_fp16)[name = string("transpose_19")];
tensor<fp16, [1, 3072, 1, 1]> var_625 = expand_dims(axes = var_625_axes_0, x = var_624)[name = string("op_625")];
string var_632_pad_type_0 = const()[name = string("op_632_pad_type_0"), val = string("valid")];
tensor<int32, [2]> var_632_strides_0 = const()[name = string("op_632_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> var_632_pad_0 = const()[name = string("op_632_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> var_632_dilations_0 = const()[name = string("op_632_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 var_632_groups_0 = const()[name = string("op_632_groups_0"), val = int32(1)];
tensor<fp16, [1, 3072, 1, 1]> var_632 = conv(dilations = var_632_dilations_0, groups = var_632_groups_0, pad = var_632_pad_0, pad_type = var_632_pad_type_0, strides = var_632_strides_0, weight = model_model_layers_16_self_attn_q_proj_weight_palettized, x = var_625)[name = string("op_632")];
tensor<int32, [4]> var_633 = const()[name = string("op_633"), val = tensor<int32, [4]>([1, 24, 1, 128])];
tensor<fp16, [1, 24, 1, 128]> var_634 = reshape(shape = var_633, x = var_632)[name = string("op_634")];
string var_641_pad_type_0 = const()[name = string("op_641_pad_type_0"), val = string("valid")];
tensor<int32, [2]> var_641_strides_0 = const()[name = string("op_641_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> var_641_pad_0 = const()[name = string("op_641_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> var_641_dilations_0 = const()[name = string("op_641_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 var_641_groups_0 = const()[name = string("op_641_groups_0"), val = int32(1)];
tensor<fp16, [1, 1024, 1, 1]> var_641 = conv(dilations = var_641_dilations_0, groups = var_641_groups_0, pad = var_641_pad_0, pad_type = var_641_pad_type_0, strides = var_641_strides_0, weight = model_model_layers_16_self_attn_k_proj_weight_palettized, x = var_625)[name = string("op_641")];
tensor<int32, [4]> var_642 = const()[name = string("op_642"), val = tensor<int32, [4]>([1, 8, 1, 128])];
tensor<fp16, [1, 8, 1, 128]> var_643 = reshape(shape = var_642, x = var_641)[name = string("op_643")];
string var_650_pad_type_0 = const()[name = string("op_650_pad_type_0"), val = string("valid")];
tensor<int32, [2]> var_650_strides_0 = const()[name = string("op_650_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> var_650_pad_0 = const()[name = string("op_650_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> var_650_dilations_0 = const()[name = string("op_650_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 var_650_groups_0 = const()[name = string("op_650_groups_0"), val = int32(1)];
tensor<fp16, [1, 1024, 1, 1]> var_650 = conv(dilations = var_650_dilations_0, groups = var_650_groups_0, pad = var_650_pad_0, pad_type = var_650_pad_type_0, strides = var_650_strides_0, weight = model_model_layers_16_self_attn_v_proj_weight_palettized, x = var_625)[name = string("op_650")];
tensor<int32, [4]> var_651 = const()[name = string("op_651"), val = tensor<int32, [4]>([1, 8, 1, 128])];
tensor<fp16, [1, 8, 1, 128]> var_652 = reshape(shape = var_651, x = var_650)[name = string("op_652")];
tensor<int32, [4]> x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor<int32, [4]>([1, 24, 1, 64])];
tensor<bool, [4]> x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 24, 1, 64]> x1_9 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = var_634)[name = string("x1_9")];
tensor<int32, [4]> x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor<int32, [4]>([1, 24, 1, 128])];
tensor<bool, [4]> x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 24, 1, 64]> x2_9 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = var_634)[name = string("x2_9")];
tensor<fp16, [1, 24, 1, 64]> var_666_cast_fp16 = mul(x = x1_9, y = cos_3_cast_fp16)[name = string("op_666_cast_fp16")];
tensor<fp16, [1, 24, 1, 64]> var_667_cast_fp16 = mul(x = x2_9, y = sin_3_cast_fp16)[name = string("op_667_cast_fp16")];
tensor<fp16, [1, 24, 1, 64]> var_668_cast_fp16 = sub(x = var_666_cast_fp16, y = var_667_cast_fp16)[name = string("op_668_cast_fp16")];
tensor<fp16, [1, 24, 1, 64]> var_669_cast_fp16 = mul(x = x2_9, y = cos_3_cast_fp16)[name = string("op_669_cast_fp16")];
tensor<fp16, [1, 24, 1, 64]> var_670_cast_fp16 = mul(x = x1_9, y = sin_3_cast_fp16)[name = string("op_670_cast_fp16")];
tensor<fp16, [1, 24, 1, 64]> var_671_cast_fp16 = add(x = var_669_cast_fp16, y = var_670_cast_fp16)[name = string("op_671_cast_fp16")];
bool rotated_9_interleave_0 = const()[name = string("rotated_9_interleave_0"), val = bool(false)];
tensor<fp16, [1, 24, 1, 128]> rotated_9_cast_fp16 = concat(axis = var_52, interleave = rotated_9_interleave_0, values = (var_668_cast_fp16, var_671_cast_fp16))[name = string("rotated_9_cast_fp16")];
tensor<int32, [4]> x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor<int32, [4]>([1, 8, 1, 64])];
tensor<bool, [4]> x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 8, 1, 64]> x1_11 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = var_643)[name = string("x1_11")];
tensor<int32, [4]> x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor<int32, [4]>([1, 8, 1, 128])];
tensor<bool, [4]> x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 8, 1, 64]> x2_11 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = var_643)[name = string("x2_11")];
tensor<fp16, [1, 8, 1, 64]> var_687_cast_fp16 = mul(x = x1_11, y = cos_3_cast_fp16)[name = string("op_687_cast_fp16")];
tensor<fp16, [1, 8, 1, 64]> var_688_cast_fp16 = mul(x = x2_11, y = sin_3_cast_fp16)[name = string("op_688_cast_fp16")];
tensor<fp16, [1, 8, 1, 64]> var_689_cast_fp16 = sub(x = var_687_cast_fp16, y = var_688_cast_fp16)[name = string("op_689_cast_fp16")];
tensor<fp16, [1, 8, 1, 64]> var_690_cast_fp16 = mul(x = x2_11, y = cos_3_cast_fp16)[name = string("op_690_cast_fp16")];
tensor<fp16, [1, 8, 1, 64]> var_691_cast_fp16 = mul(x = x1_11, y = sin_3_cast_fp16)[name = string("op_691_cast_fp16")];
tensor<fp16, [1, 8, 1, 64]> var_692_cast_fp16 = add(x = var_690_cast_fp16, y = var_691_cast_fp16)[name = string("op_692_cast_fp16")];
bool rotated_11_interleave_0 = const()[name = string("rotated_11_interleave_0"), val = bool(false)];
tensor<fp16, [1, 8, 1, 128]> rotated_11_cast_fp16 = concat(axis = var_52, interleave = rotated_11_interleave_0, values = (var_689_cast_fp16, var_692_cast_fp16))[name = string("rotated_11_cast_fp16")];
tensor<int32, [1]> expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor<int32, [1]>([16])];
tensor<int32, [1]> expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_28 = const()[name = string("expand_dims_28"), val = tensor<int32, [1]>([17])];
int32 concat_18_axis_0 = const()[name = string("concat_18_axis_0"), val = int32(0)];
bool concat_18_interleave_0 = const()[name = string("concat_18_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_18 = concat(axis = concat_18_axis_0, interleave = concat_18_interleave_0, values = (expand_dims_24, expand_dims_25, current_pos, expand_dims_27))[name = string("concat_18")];
tensor<int32, [1]> concat_19_values1_0 = const()[name = string("concat_19_values1_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> concat_19_values3_0 = const()[name = string("concat_19_values3_0"), val = tensor<int32, [1]>([0])];
int32 concat_19_axis_0 = const()[name = string("concat_19_axis_0"), val = int32(0)];
bool concat_19_interleave_0 = const()[name = string("concat_19_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_19 = concat(axis = concat_19_axis_0, interleave = concat_19_interleave_0, values = (expand_dims_28, concat_19_values1_0, var_327, concat_19_values3_0))[name = string("concat_19")];
tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_18, begin_mask = model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0, end = concat_19, end_mask = model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_5_stride_0, update = rotated_11_cast_fp16, x = coreml_update_state_17)[name = string("model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16")];
write_state(data = model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_4_write_state")];
tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_18 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_4")];
tensor<int32, [1]> expand_dims_30 = const()[name = string("expand_dims_30"), val = tensor<int32, [1]>([44])];
tensor<int32, [1]> expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_33 = const()[name = string("expand_dims_33"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor<int32, [1]>([45])];
int32 concat_22_axis_0 = const()[name = string("concat_22_axis_0"), val = int32(0)];
bool concat_22_interleave_0 = const()[name = string("concat_22_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_22 = concat(axis = concat_22_axis_0, interleave = concat_22_interleave_0, values = (expand_dims_30, expand_dims_31, current_pos, expand_dims_33))[name = string("concat_22")];
tensor<int32, [1]> concat_23_values1_0 = const()[name = string("concat_23_values1_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> concat_23_values3_0 = const()[name = string("concat_23_values3_0"), val = tensor<int32, [1]>([0])];
int32 concat_23_axis_0 = const()[name = string("concat_23_axis_0"), val = int32(0)];
bool concat_23_interleave_0 = const()[name = string("concat_23_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_23 = concat(axis = concat_23_axis_0, interleave = concat_23_interleave_0, values = (expand_dims_34, concat_23_values1_0, var_327, concat_23_values3_0))[name = string("concat_23")];
tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_22, begin_mask = model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0, end = concat_23, end_mask = model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_6_stride_0, update = var_652, x = coreml_update_state_18)[name = string("model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16")];
write_state(data = model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_5_write_state")];
tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_19 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_5")];
tensor<int32, [4]> var_712_begin_0 = const()[name = string("op_712_begin_0"), val = tensor<int32, [4]>([16, 0, 0, 0])];
tensor<int32, [4]> var_712_end_0 = const()[name = string("op_712_end_0"), val = tensor<int32, [4]>([17, 8, 1024, 128])];
tensor<bool, [4]> var_712_end_mask_0 = const()[name = string("op_712_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<fp16, [1, 8, 1024, 128]> var_712_cast_fp16 = slice_by_index(begin = var_712_begin_0, end = var_712_end_0, end_mask = var_712_end_mask_0, x = coreml_update_state_19)[name = string("op_712_cast_fp16")];
tensor<int32, [1]> K_layer_cache_5_axes_0 = const()[name = string("K_layer_cache_5_axes_0"), val = tensor<int32, [1]>([0])];
tensor<fp16, [8, 1024, 128]> K_layer_cache_5_cast_fp16 = squeeze(axes = K_layer_cache_5_axes_0, x = var_712_cast_fp16)[name = string("K_layer_cache_5_cast_fp16")];
tensor<int32, [4]> var_714_begin_0 = const()[name = string("op_714_begin_0"), val = tensor<int32, [4]>([44, 0, 0, 0])];
tensor<int32, [4]> var_714_end_0 = const()[name = string("op_714_end_0"), val = tensor<int32, [4]>([45, 8, 1024, 128])];
tensor<bool, [4]> var_714_end_mask_0 = const()[name = string("op_714_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<fp16, [1, 8, 1024, 128]> var_714_cast_fp16 = slice_by_index(begin = var_714_begin_0, end = var_714_end_0, end_mask = var_714_end_mask_0, x = coreml_update_state_19)[name = string("op_714_cast_fp16")];
tensor<int32, [1]> V_layer_cache_5_axes_0 = const()[name = string("V_layer_cache_5_axes_0"), val = tensor<int32, [1]>([0])];
tensor<fp16, [8, 1024, 128]> V_layer_cache_5_cast_fp16 = squeeze(axes = V_layer_cache_5_axes_0, x = var_714_cast_fp16)[name = string("V_layer_cache_5_cast_fp16")];
tensor<int32, [1]> x_67_axes_0 = const()[name = string("x_67_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [8, 1, 1024, 128]> x_67_cast_fp16 = expand_dims(axes = x_67_axes_0, x = K_layer_cache_5_cast_fp16)[name = string("x_67_cast_fp16")];
tensor<int32, [4]> var_723 = const()[name = string("op_723"), val = tensor<int32, [4]>([1, 3, 1, 1])];
tensor<fp16, [8, 3, 1024, 128]> x_69_cast_fp16 = tile(reps = var_723, x = x_67_cast_fp16)[name = string("x_69_cast_fp16")];
tensor<int32, [4]> var_727 = const()[name = string("op_727"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
tensor<fp16, [1, 24, 1024, 128]> key_states_11_cast_fp16 = reshape(shape = var_727, x = x_69_cast_fp16)[name = string("key_states_11_cast_fp16")];
tensor<int32, [1]> x_73_axes_0 = const()[name = string("x_73_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [8, 1, 1024, 128]> x_73_cast_fp16 = expand_dims(axes = x_73_axes_0, x = V_layer_cache_5_cast_fp16)[name = string("x_73_cast_fp16")];
tensor<int32, [4]> var_730 = const()[name = string("op_730"), val = tensor<int32, [4]>([1, 3, 1, 1])];
tensor<fp16, [8, 3, 1024, 128]> x_75_cast_fp16 = tile(reps = var_730, x = x_73_cast_fp16)[name = string("x_75_cast_fp16")];
tensor<int32, [4]> var_734 = const()[name = string("op_734"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
tensor<fp16, [1, 24, 1024, 128]> value_states_11_cast_fp16 = reshape(shape = var_734, x = x_75_cast_fp16)[name = string("value_states_11_cast_fp16")];
bool var_737_transpose_x_1 = const()[name = string("op_737_transpose_x_1"), val = bool(false)];
bool var_737_transpose_y_1 = const()[name = string("op_737_transpose_y_1"), val = bool(true)];
tensor<fp16, [1, 24, 1, 1024]> var_737_cast_fp16 = matmul(transpose_x = var_737_transpose_x_1, transpose_y = var_737_transpose_y_1, x = rotated_9_cast_fp16, y = key_states_11_cast_fp16)[name = string("op_737_cast_fp16")];
fp16 var_738_to_fp16 = const()[name = string("op_738_to_fp16"), val = fp16(0x1.6ap-4)];
tensor<fp16, [1, 24, 1, 1024]> attn_weights_9_cast_fp16 = mul(x = var_737_cast_fp16, y = var_738_to_fp16)[name = string("attn_weights_9_cast_fp16")];
tensor<fp16, [1, 24, 1, 1024]> x_77_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = causal_mask)[name = string("x_77_cast_fp16")];
tensor<int32, [1]> reduce_max_2_axes_0 = const()[name = string("reduce_max_2_axes_0"), val = tensor<int32, [1]>([-1])];
bool reduce_max_2_keep_dims_0 = const()[name = string("reduce_max_2_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 24, 1, 1]> reduce_max_2_cast_fp16 = reduce_max(axes = reduce_max_2_axes_0, keep_dims = reduce_max_2_keep_dims_0, x = x_77_cast_fp16)[name = string("reduce_max_2_cast_fp16")];
tensor<fp16, [1, 24, 1, 1024]> x_79_cast_fp16 = sub(x = x_77_cast_fp16, y = reduce_max_2_cast_fp16)[name = string("x_79_cast_fp16")];
tensor<fp16, [1, 24, 1, 1024]> exp_x_5_cast_fp16 = exp(x = x_79_cast_fp16)[name = string("exp_x_5_cast_fp16")];
tensor<int32, [1]> var_749_axes_0 = const()[name = string("op_749_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_749_keep_dims_0 = const()[name = string("op_749_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 24, 1, 1]> var_749_cast_fp16 = reduce_sum(axes = var_749_axes_0, keep_dims = var_749_keep_dims_0, x = exp_x_5_cast_fp16)[name = string("op_749_cast_fp16")];
tensor<fp16, [1, 24, 1, 1024]> attn_weights_11_cast_fp16 = real_div(x = exp_x_5_cast_fp16, y = var_749_cast_fp16)[name = string("attn_weights_11_cast_fp16")];
bool attn_output_13_transpose_x_0 = const()[name = string("attn_output_13_transpose_x_0"), val = bool(false)];
bool attn_output_13_transpose_y_0 = const()[name = string("attn_output_13_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 24, 1, 128]> attn_output_13_cast_fp16 = matmul(transpose_x = attn_output_13_transpose_x_0, transpose_y = attn_output_13_transpose_y_0, x = attn_weights_11_cast_fp16, y = value_states_11_cast_fp16)[name = string("attn_output_13_cast_fp16")];
tensor<int32, [4]> var_752_perm_0 = const()[name = string("op_752_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_754 = const()[name = string("op_754"), val = tensor<int32, [3]>([1, 1, 3072])];
tensor<fp16, [1, 1, 24, 128]> var_752_cast_fp16 = transpose(perm = var_752_perm_0, x = attn_output_13_cast_fp16)[name = string("transpose_18")];
tensor<fp16, [1, 1, 3072]> input_33_cast_fp16 = reshape(shape = var_754, x = var_752_cast_fp16)[name = string("input_33_cast_fp16")];
tensor<fp16, [3072, 3072]> model_model_layers_16_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396593216))), lut = tensor<fp16, [384, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(401311872))))[name = string("model_model_layers_16_self_attn_o_proj_weight_promoted_to_fp16_palettized")];
tensor<fp16, [1, 1, 3072]> linear_2_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_16_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_33_cast_fp16)[name = string("linear_2_cast_fp16")];
tensor<fp16, [1, 1, 3072]> hidden_states_21_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = linear_2_cast_fp16)[name = string("hidden_states_21_cast_fp16")];
tensor<int32, [1]> mean_11_axes_0 = const()[name = string("mean_11_axes_0"), val = tensor<int32, [1]>([-1])];
bool mean_11_keep_dims_0 = const()[name = string("mean_11_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 1, 1]> mean_11_cast_fp16 = reduce_mean(axes = mean_11_axes_0, keep_dims = mean_11_keep_dims_0, x = hidden_states_21_cast_fp16)[name = string("mean_11_cast_fp16")];
tensor<fp16, [1, 1, 3072]> input_35_cast_fp16 = sub(x = hidden_states_21_cast_fp16, y = mean_11_cast_fp16)[name = string("input_35_cast_fp16")];
tensor<int32, [1]> var_765_axes_0 = const()[name = string("op_765_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [3072]> model_model_layers_16_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_16_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(401324224)))];
tensor<fp16, [1, 1, 3072]> var_765_cast_fp16 = layer_norm(axes = var_765_axes_0, epsilon = var_47_to_fp16, gamma = model_model_layers_16_post_attention_layernorm_weight_to_fp16, x = input_35_cast_fp16)[name = string("op_765_cast_fp16")];
tensor<int32, [3]> var_772 = const()[name = string("op_772"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [1]> input_37_axes_0 = const()[name = string("input_37_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 3072, 1]> var_773 = transpose(perm = var_772, x = var_765_cast_fp16)[name = string("transpose_17")];
tensor<fp16, [1, 3072, 1, 1]> input_37 = expand_dims(axes = input_37_axes_0, x = var_773)[name = string("input_37")];
string input_39_pad_type_0 = const()[name = string("input_39_pad_type_0"), val = string("valid")];
tensor<int32, [2]> input_39_strides_0 = const()[name = string("input_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> input_39_pad_0 = const()[name = string("input_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> input_39_dilations_0 = const()[name = string("input_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 input_39_groups_0 = const()[name = string("input_39_groups_0"), val = int32(1)];
tensor<fp16, [1, 8192, 1, 1]> input_39 = conv(dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = model_model_layers_16_mlp_gate_proj_weight_palettized, x = input_37)[name = string("input_39")];
string up_states_5_pad_type_0 = const()[name = string("up_states_5_pad_type_0"), val = string("valid")];
tensor<int32, [2]> up_states_5_strides_0 = const()[name = string("up_states_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> up_states_5_pad_0 = const()[name = string("up_states_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> up_states_5_dilations_0 = const()[name = string("up_states_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 up_states_5_groups_0 = const()[name = string("up_states_5_groups_0"), val = int32(1)];
tensor<fp16, [1, 8192, 1, 1]> up_states_5 = conv(dilations = up_states_5_dilations_0, groups = up_states_5_groups_0, pad = up_states_5_pad_0, pad_type = up_states_5_pad_type_0, strides = up_states_5_strides_0, weight = model_model_layers_16_mlp_up_proj_weight_palettized, x = input_37)[name = string("up_states_5")];
tensor<fp16, [1, 8192, 1, 1]> gate_states_5 = silu(x = input_39)[name = string("gate_states_5")];
tensor<fp16, [1, 8192, 1, 1]> input_41 = mul(x = gate_states_5, y = up_states_5)[name = string("input_41")];
string hidden_states_23_pad_type_0 = const()[name = string("hidden_states_23_pad_type_0"), val = string("valid")];
tensor<int32, [2]> hidden_states_23_strides_0 = const()[name = string("hidden_states_23_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> hidden_states_23_pad_0 = const()[name = string("hidden_states_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> hidden_states_23_dilations_0 = const()[name = string("hidden_states_23_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 hidden_states_23_groups_0 = const()[name = string("hidden_states_23_groups_0"), val = int32(1)];
tensor<fp16, [1, 3072, 1, 1]> hidden_states_23 = conv(dilations = hidden_states_23_dilations_0, groups = hidden_states_23_groups_0, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = hidden_states_23_strides_0, weight = model_model_layers_16_mlp_down_proj_weight_palettized, x = input_41)[name = string("hidden_states_23")];
tensor<int32, [1]> var_795_axes_0 = const()[name = string("op_795_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 3072, 1]> var_795 = squeeze(axes = var_795_axes_0, x = hidden_states_23)[name = string("op_795")];
tensor<int32, [3]> var_796 = const()[name = string("op_796"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 1, 3072]> var_797 = transpose(perm = var_796, x = var_795)[name = string("transpose_16")];
tensor<fp16, [1, 1, 3072]> hidden_states_25_cast_fp16 = add(x = hidden_states_21_cast_fp16, y = var_797)[name = string("hidden_states_25_cast_fp16")];
tensor<int32, [1]> mean_13_axes_0 = const()[name = string("mean_13_axes_0"), val = tensor<int32, [1]>([-1])];
bool mean_13_keep_dims_0 = const()[name = string("mean_13_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 1, 1]> mean_13_cast_fp16 = reduce_mean(axes = mean_13_axes_0, keep_dims = mean_13_keep_dims_0, x = hidden_states_25_cast_fp16)[name = string("mean_13_cast_fp16")];
tensor<fp16, [1, 1, 3072]> input_43_cast_fp16 = sub(x = hidden_states_25_cast_fp16, y = mean_13_cast_fp16)[name = string("input_43_cast_fp16")];
tensor<int32, [1]> var_805_axes_0 = const()[name = string("op_805_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [3072]> model_model_layers_17_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_17_input_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(401330432)))];
tensor<fp16, [1, 1, 3072]> var_805_cast_fp16 = layer_norm(axes = var_805_axes_0, epsilon = var_47_to_fp16, gamma = model_model_layers_17_input_layernorm_weight_to_fp16, x = input_43_cast_fp16)[name = string("op_805_cast_fp16")];
tensor<int32, [3]> var_808 = const()[name = string("op_808"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [1]> var_810_axes_0 = const()[name = string("op_810_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 3072, 1]> var_809 = transpose(perm = var_808, x = var_805_cast_fp16)[name = string("transpose_15")];
tensor<fp16, [1, 3072, 1, 1]> var_810 = expand_dims(axes = var_810_axes_0, x = var_809)[name = string("op_810")];
string var_817_pad_type_0 = const()[name = string("op_817_pad_type_0"), val = string("valid")];
tensor<int32, [2]> var_817_strides_0 = const()[name = string("op_817_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> var_817_pad_0 = const()[name = string("op_817_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> var_817_dilations_0 = const()[name = string("op_817_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 var_817_groups_0 = const()[name = string("op_817_groups_0"), val = int32(1)];
tensor<fp16, [1, 3072, 1, 1]> var_817 = conv(dilations = var_817_dilations_0, groups = var_817_groups_0, pad = var_817_pad_0, pad_type = var_817_pad_type_0, strides = var_817_strides_0, weight = model_model_layers_17_self_attn_q_proj_weight_palettized, x = var_810)[name = string("op_817")];
tensor<int32, [4]> var_818 = const()[name = string("op_818"), val = tensor<int32, [4]>([1, 24, 1, 128])];
tensor<fp16, [1, 24, 1, 128]> var_819 = reshape(shape = var_818, x = var_817)[name = string("op_819")];
string var_826_pad_type_0 = const()[name = string("op_826_pad_type_0"), val = string("valid")];
tensor<int32, [2]> var_826_strides_0 = const()[name = string("op_826_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> var_826_pad_0 = const()[name = string("op_826_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> var_826_dilations_0 = const()[name = string("op_826_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 var_826_groups_0 = const()[name = string("op_826_groups_0"), val = int32(1)];
tensor<fp16, [1, 1024, 1, 1]> var_826 = conv(dilations = var_826_dilations_0, groups = var_826_groups_0, pad = var_826_pad_0, pad_type = var_826_pad_type_0, strides = var_826_strides_0, weight = model_model_layers_17_self_attn_k_proj_weight_palettized, x = var_810)[name = string("op_826")];
tensor<int32, [4]> var_827 = const()[name = string("op_827"), val = tensor<int32, [4]>([1, 8, 1, 128])];
tensor<fp16, [1, 8, 1, 128]> var_828 = reshape(shape = var_827, x = var_826)[name = string("op_828")];
string var_835_pad_type_0 = const()[name = string("op_835_pad_type_0"), val = string("valid")];
tensor<int32, [2]> var_835_strides_0 = const()[name = string("op_835_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> var_835_pad_0 = const()[name = string("op_835_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> var_835_dilations_0 = const()[name = string("op_835_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 var_835_groups_0 = const()[name = string("op_835_groups_0"), val = int32(1)];
tensor<fp16, [1, 1024, 1, 1]> var_835 = conv(dilations = var_835_dilations_0, groups = var_835_groups_0, pad = var_835_pad_0, pad_type = var_835_pad_type_0, strides = var_835_strides_0, weight = model_model_layers_17_self_attn_v_proj_weight_palettized, x = var_810)[name = string("op_835")];
tensor<int32, [4]> var_836 = const()[name = string("op_836"), val = tensor<int32, [4]>([1, 8, 1, 128])];
tensor<fp16, [1, 8, 1, 128]> var_837 = reshape(shape = var_836, x = var_835)[name = string("op_837")];
tensor<int32, [4]> x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor<int32, [4]>([1, 24, 1, 64])];
tensor<bool, [4]> x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 24, 1, 64]> x1_13 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = var_819)[name = string("x1_13")];
tensor<int32, [4]> x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor<int32, [4]>([1, 24, 1, 128])];
tensor<bool, [4]> x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 24, 1, 64]> x2_13 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = var_819)[name = string("x2_13")];
tensor<fp16, [1, 24, 1, 64]> var_851_cast_fp16 = mul(x = x1_13, y = cos_3_cast_fp16)[name = string("op_851_cast_fp16")];
tensor<fp16, [1, 24, 1, 64]> var_852_cast_fp16 = mul(x = x2_13, y = sin_3_cast_fp16)[name = string("op_852_cast_fp16")];
tensor<fp16, [1, 24, 1, 64]> var_853_cast_fp16 = sub(x = var_851_cast_fp16, y = var_852_cast_fp16)[name = string("op_853_cast_fp16")];
tensor<fp16, [1, 24, 1, 64]> var_854_cast_fp16 = mul(x = x2_13, y = cos_3_cast_fp16)[name = string("op_854_cast_fp16")];
tensor<fp16, [1, 24, 1, 64]> var_855_cast_fp16 = mul(x = x1_13, y = sin_3_cast_fp16)[name = string("op_855_cast_fp16")];
tensor<fp16, [1, 24, 1, 64]> var_856_cast_fp16 = add(x = var_854_cast_fp16, y = var_855_cast_fp16)[name = string("op_856_cast_fp16")];
bool rotated_13_interleave_0 = const()[name = string("rotated_13_interleave_0"), val = bool(false)];
tensor<fp16, [1, 24, 1, 128]> rotated_13_cast_fp16 = concat(axis = var_52, interleave = rotated_13_interleave_0, values = (var_853_cast_fp16, var_856_cast_fp16))[name = string("rotated_13_cast_fp16")];
tensor<int32, [4]> x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor<int32, [4]>([1, 8, 1, 64])];
tensor<bool, [4]> x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 8, 1, 64]> x1_15 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = var_828)[name = string("x1_15")];
tensor<int32, [4]> x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor<int32, [4]>([1, 8, 1, 128])];
tensor<bool, [4]> x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 8, 1, 64]> x2_15 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = var_828)[name = string("x2_15")];
tensor<fp16, [1, 8, 1, 64]> var_872_cast_fp16 = mul(x = x1_15, y = cos_3_cast_fp16)[name = string("op_872_cast_fp16")];
tensor<fp16, [1, 8, 1, 64]> var_873_cast_fp16 = mul(x = x2_15, y = sin_3_cast_fp16)[name = string("op_873_cast_fp16")];
tensor<fp16, [1, 8, 1, 64]> var_874_cast_fp16 = sub(x = var_872_cast_fp16, y = var_873_cast_fp16)[name = string("op_874_cast_fp16")];
tensor<fp16, [1, 8, 1, 64]> var_875_cast_fp16 = mul(x = x2_15, y = cos_3_cast_fp16)[name = string("op_875_cast_fp16")];
tensor<fp16, [1, 8, 1, 64]> var_876_cast_fp16 = mul(x = x1_15, y = sin_3_cast_fp16)[name = string("op_876_cast_fp16")];
tensor<fp16, [1, 8, 1, 64]> var_877_cast_fp16 = add(x = var_875_cast_fp16, y = var_876_cast_fp16)[name = string("op_877_cast_fp16")];
bool rotated_15_interleave_0 = const()[name = string("rotated_15_interleave_0"), val = bool(false)];
tensor<fp16, [1, 8, 1, 128]> rotated_15_cast_fp16 = concat(axis = var_52, interleave = rotated_15_interleave_0, values = (var_874_cast_fp16, var_877_cast_fp16))[name = string("rotated_15_cast_fp16")];
tensor<int32, [1]> expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor<int32, [1]>([17])];
tensor<int32, [1]> expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_40 = const()[name = string("expand_dims_40"), val = tensor<int32, [1]>([18])];
int32 concat_26_axis_0 = const()[name = string("concat_26_axis_0"), val = int32(0)];
bool concat_26_interleave_0 = const()[name = string("concat_26_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_26 = concat(axis = concat_26_axis_0, interleave = concat_26_interleave_0, values = (expand_dims_36, expand_dims_37, current_pos, expand_dims_39))[name = string("concat_26")];
tensor<int32, [1]> concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor<int32, [1]>([0])];
int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)];
bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (expand_dims_40, concat_27_values1_0, var_327, concat_27_values3_0))[name = string("concat_27")];
tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_26, begin_mask = model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0, end = concat_27, end_mask = model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_7_stride_0, update = rotated_15_cast_fp16, x = coreml_update_state_19)[name = string("model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16")];
write_state(data = model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_6_write_state")];
tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_20 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_6")];
tensor<int32, [1]> expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor<int32, [1]>([45])];
tensor<int32, [1]> expand_dims_43 = const()[name = string("expand_dims_43"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_45 = const()[name = string("expand_dims_45"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_46 = const()[name = string("expand_dims_46"), val = tensor<int32, [1]>([46])];
int32 concat_30_axis_0 = const()[name = string("concat_30_axis_0"), val = int32(0)];
bool concat_30_interleave_0 = const()[name = string("concat_30_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_30 = concat(axis = concat_30_axis_0, interleave = concat_30_interleave_0, values = (expand_dims_42, expand_dims_43, current_pos, expand_dims_45))[name = string("concat_30")];
tensor<int32, [1]> concat_31_values1_0 = const()[name = string("concat_31_values1_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> concat_31_values3_0 = const()[name = string("concat_31_values3_0"), val = tensor<int32, [1]>([0])];
int32 concat_31_axis_0 = const()[name = string("concat_31_axis_0"), val = int32(0)];
bool concat_31_interleave_0 = const()[name = string("concat_31_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_31 = concat(axis = concat_31_axis_0, interleave = concat_31_interleave_0, values = (expand_dims_46, concat_31_values1_0, var_327, concat_31_values3_0))[name = string("concat_31")];
tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_30, begin_mask = model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0, end = concat_31, end_mask = model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_8_stride_0, update = var_837, x = coreml_update_state_20)[name = string("model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16")];
write_state(data = model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_7_write_state")];
tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_21 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_7")];
tensor<int32, [4]> var_897_begin_0 = const()[name = string("op_897_begin_0"), val = tensor<int32, [4]>([17, 0, 0, 0])];
tensor<int32, [4]> var_897_end_0 = const()[name = string("op_897_end_0"), val = tensor<int32, [4]>([18, 8, 1024, 128])];
tensor<bool, [4]> var_897_end_mask_0 = const()[name = string("op_897_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<fp16, [1, 8, 1024, 128]> var_897_cast_fp16 = slice_by_index(begin = var_897_begin_0, end = var_897_end_0, end_mask = var_897_end_mask_0, x = coreml_update_state_21)[name = string("op_897_cast_fp16")];
tensor<int32, [1]> K_layer_cache_7_axes_0 = const()[name = string("K_layer_cache_7_axes_0"), val = tensor<int32, [1]>([0])];
tensor<fp16, [8, 1024, 128]> K_layer_cache_7_cast_fp16 = squeeze(axes = K_layer_cache_7_axes_0, x = var_897_cast_fp16)[name = string("K_layer_cache_7_cast_fp16")];
tensor<int32, [4]> var_899_begin_0 = const()[name = string("op_899_begin_0"), val = tensor<int32, [4]>([45, 0, 0, 0])];
tensor<int32, [4]> var_899_end_0 = const()[name = string("op_899_end_0"), val = tensor<int32, [4]>([46, 8, 1024, 128])];
tensor<bool, [4]> var_899_end_mask_0 = const()[name = string("op_899_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<fp16, [1, 8, 1024, 128]> var_899_cast_fp16 = slice_by_index(begin = var_899_begin_0, end = var_899_end_0, end_mask = var_899_end_mask_0, x = coreml_update_state_21)[name = string("op_899_cast_fp16")];
tensor<int32, [1]> V_layer_cache_7_axes_0 = const()[name = string("V_layer_cache_7_axes_0"), val = tensor<int32, [1]>([0])];
tensor<fp16, [8, 1024, 128]> V_layer_cache_7_cast_fp16 = squeeze(axes = V_layer_cache_7_axes_0, x = var_899_cast_fp16)[name = string("V_layer_cache_7_cast_fp16")];
tensor<int32, [1]> x_95_axes_0 = const()[name = string("x_95_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [8, 1, 1024, 128]> x_95_cast_fp16 = expand_dims(axes = x_95_axes_0, x = K_layer_cache_7_cast_fp16)[name = string("x_95_cast_fp16")];
tensor<int32, [4]> var_908 = const()[name = string("op_908"), val = tensor<int32, [4]>([1, 3, 1, 1])];
tensor<fp16, [8, 3, 1024, 128]> x_97_cast_fp16 = tile(reps = var_908, x = x_95_cast_fp16)[name = string("x_97_cast_fp16")];
tensor<int32, [4]> var_912 = const()[name = string("op_912"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
tensor<fp16, [1, 24, 1024, 128]> key_states_15_cast_fp16 = reshape(shape = var_912, x = x_97_cast_fp16)[name = string("key_states_15_cast_fp16")];
tensor<int32, [1]> x_101_axes_0 = const()[name = string("x_101_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [8, 1, 1024, 128]> x_101_cast_fp16 = expand_dims(axes = x_101_axes_0, x = V_layer_cache_7_cast_fp16)[name = string("x_101_cast_fp16")];
tensor<int32, [4]> var_915 = const()[name = string("op_915"), val = tensor<int32, [4]>([1, 3, 1, 1])];
tensor<fp16, [8, 3, 1024, 128]> x_103_cast_fp16 = tile(reps = var_915, x = x_101_cast_fp16)[name = string("x_103_cast_fp16")];
tensor<int32, [4]> var_919 = const()[name = string("op_919"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
tensor<fp16, [1, 24, 1024, 128]> value_states_15_cast_fp16 = reshape(shape = var_919, x = x_103_cast_fp16)[name = string("value_states_15_cast_fp16")];
bool var_922_transpose_x_1 = const()[name = string("op_922_transpose_x_1"), val = bool(false)];
bool var_922_transpose_y_1 = const()[name = string("op_922_transpose_y_1"), val = bool(true)];
tensor<fp16, [1, 24, 1, 1024]> var_922_cast_fp16 = matmul(transpose_x = var_922_transpose_x_1, transpose_y = var_922_transpose_y_1, x = rotated_13_cast_fp16, y = key_states_15_cast_fp16)[name = string("op_922_cast_fp16")];
fp16 var_923_to_fp16 = const()[name = string("op_923_to_fp16"), val = fp16(0x1.6ap-4)];
tensor<fp16, [1, 24, 1, 1024]> attn_weights_13_cast_fp16 = mul(x = var_922_cast_fp16, y = var_923_to_fp16)[name = string("attn_weights_13_cast_fp16")];
tensor<fp16, [1, 24, 1, 1024]> x_105_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = causal_mask)[name = string("x_105_cast_fp16")];
tensor<int32, [1]> reduce_max_3_axes_0 = const()[name = string("reduce_max_3_axes_0"), val = tensor<int32, [1]>([-1])];
bool reduce_max_3_keep_dims_0 = const()[name = string("reduce_max_3_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 24, 1, 1]> reduce_max_3_cast_fp16 = reduce_max(axes = reduce_max_3_axes_0, keep_dims = reduce_max_3_keep_dims_0, x = x_105_cast_fp16)[name = string("reduce_max_3_cast_fp16")];
tensor<fp16, [1, 24, 1, 1024]> x_107_cast_fp16 = sub(x = x_105_cast_fp16, y = reduce_max_3_cast_fp16)[name = string("x_107_cast_fp16")];
tensor<fp16, [1, 24, 1, 1024]> exp_x_7_cast_fp16 = exp(x = x_107_cast_fp16)[name = string("exp_x_7_cast_fp16")];
tensor<int32, [1]> var_934_axes_0 = const()[name = string("op_934_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_934_keep_dims_0 = const()[name = string("op_934_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 24, 1, 1]> var_934_cast_fp16 = reduce_sum(axes = var_934_axes_0, keep_dims = var_934_keep_dims_0, x = exp_x_7_cast_fp16)[name = string("op_934_cast_fp16")];
tensor<fp16, [1, 24, 1, 1024]> attn_weights_15_cast_fp16 = real_div(x = exp_x_7_cast_fp16, y = var_934_cast_fp16)[name = string("attn_weights_15_cast_fp16")];
bool attn_output_19_transpose_x_0 = const()[name = string("attn_output_19_transpose_x_0"), val = bool(false)];
bool attn_output_19_transpose_y_0 = const()[name = string("attn_output_19_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 24, 1, 128]> attn_output_19_cast_fp16 = matmul(transpose_x = attn_output_19_transpose_x_0, transpose_y = attn_output_19_transpose_y_0, x = attn_weights_15_cast_fp16, y = value_states_15_cast_fp16)[name = string("attn_output_19_cast_fp16")];
tensor<int32, [4]> var_937_perm_0 = const()[name = string("op_937_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_939 = const()[name = string("op_939"), val = tensor<int32, [3]>([1, 1, 3072])];
tensor<fp16, [1, 1, 24, 128]> var_937_cast_fp16 = transpose(perm = var_937_perm_0, x = attn_output_19_cast_fp16)[name = string("transpose_14")];
tensor<fp16, [1, 1, 3072]> input_47_cast_fp16 = reshape(shape = var_939, x = var_937_cast_fp16)[name = string("input_47_cast_fp16")];
tensor<fp16, [3072, 3072]> model_model_layers_17_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(401336640))), lut = tensor<fp16, [384, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406055296))))[name = string("model_model_layers_17_self_attn_o_proj_weight_promoted_to_fp16_palettized")];
tensor<fp16, [1, 1, 3072]> linear_3_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_17_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_47_cast_fp16)[name = string("linear_3_cast_fp16")];
tensor<fp16, [1, 1, 3072]> hidden_states_29_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = linear_3_cast_fp16)[name = string("hidden_states_29_cast_fp16")];
tensor<int32, [1]> mean_15_axes_0 = const()[name = string("mean_15_axes_0"), val = tensor<int32, [1]>([-1])];
bool mean_15_keep_dims_0 = const()[name = string("mean_15_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 1, 1]> mean_15_cast_fp16 = reduce_mean(axes = mean_15_axes_0, keep_dims = mean_15_keep_dims_0, x = hidden_states_29_cast_fp16)[name = string("mean_15_cast_fp16")];
tensor<fp16, [1, 1, 3072]> input_49_cast_fp16 = sub(x = hidden_states_29_cast_fp16, y = mean_15_cast_fp16)[name = string("input_49_cast_fp16")];
tensor<int32, [1]> var_950_axes_0 = const()[name = string("op_950_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [3072]> model_model_layers_17_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_17_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406067648)))];
tensor<fp16, [1, 1, 3072]> var_950_cast_fp16 = layer_norm(axes = var_950_axes_0, epsilon = var_47_to_fp16, gamma = model_model_layers_17_post_attention_layernorm_weight_to_fp16, x = input_49_cast_fp16)[name = string("op_950_cast_fp16")];
tensor<int32, [3]> var_957 = const()[name = string("op_957"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [1]> input_51_axes_0 = const()[name = string("input_51_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 3072, 1]> var_958 = transpose(perm = var_957, x = var_950_cast_fp16)[name = string("transpose_13")];
tensor<fp16, [1, 3072, 1, 1]> input_51 = expand_dims(axes = input_51_axes_0, x = var_958)[name = string("input_51")];
string input_53_pad_type_0 = const()[name = string("input_53_pad_type_0"), val = string("valid")];
tensor<int32, [2]> input_53_strides_0 = const()[name = string("input_53_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> input_53_pad_0 = const()[name = string("input_53_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> input_53_dilations_0 = const()[name = string("input_53_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 input_53_groups_0 = const()[name = string("input_53_groups_0"), val = int32(1)];
tensor<fp16, [1, 8192, 1, 1]> input_53 = conv(dilations = input_53_dilations_0, groups = input_53_groups_0, pad = input_53_pad_0, pad_type = input_53_pad_type_0, strides = input_53_strides_0, weight = model_model_layers_17_mlp_gate_proj_weight_palettized, x = input_51)[name = string("input_53")];
string up_states_7_pad_type_0 = const()[name = string("up_states_7_pad_type_0"), val = string("valid")];
tensor<int32, [2]> up_states_7_strides_0 = const()[name = string("up_states_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> up_states_7_pad_0 = const()[name = string("up_states_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> up_states_7_dilations_0 = const()[name = string("up_states_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 up_states_7_groups_0 = const()[name = string("up_states_7_groups_0"), val = int32(1)];
tensor<fp16, [1, 8192, 1, 1]> up_states_7 = conv(dilations = up_states_7_dilations_0, groups = up_states_7_groups_0, pad = up_states_7_pad_0, pad_type = up_states_7_pad_type_0, strides = up_states_7_strides_0, weight = model_model_layers_17_mlp_up_proj_weight_palettized, x = input_51)[name = string("up_states_7")];
tensor<fp16, [1, 8192, 1, 1]> gate_states_7 = silu(x = input_53)[name = string("gate_states_7")];
tensor<fp16, [1, 8192, 1, 1]> input_55 = mul(x = gate_states_7, y = up_states_7)[name = string("input_55")];
string hidden_states_31_pad_type_0 = const()[name = string("hidden_states_31_pad_type_0"), val = string("valid")];
tensor<int32, [2]> hidden_states_31_strides_0 = const()[name = string("hidden_states_31_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> hidden_states_31_pad_0 = const()[name = string("hidden_states_31_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> hidden_states_31_dilations_0 = const()[name = string("hidden_states_31_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 hidden_states_31_groups_0 = const()[name = string("hidden_states_31_groups_0"), val = int32(1)];
tensor<fp16, [1, 3072, 1, 1]> hidden_states_31 = conv(dilations = hidden_states_31_dilations_0, groups = hidden_states_31_groups_0, pad = hidden_states_31_pad_0, pad_type = hidden_states_31_pad_type_0, strides = hidden_states_31_strides_0, weight = model_model_layers_17_mlp_down_proj_weight_palettized, x = input_55)[name = string("hidden_states_31")];
tensor<int32, [1]> var_980_axes_0 = const()[name = string("op_980_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 3072, 1]> var_980 = squeeze(axes = var_980_axes_0, x = hidden_states_31)[name = string("op_980")];
tensor<int32, [3]> var_981 = const()[name = string("op_981"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 1, 3072]> var_982 = transpose(perm = var_981, x = var_980)[name = string("transpose_12")];
tensor<fp16, [1, 1, 3072]> hidden_states_33_cast_fp16 = add(x = hidden_states_29_cast_fp16, y = var_982)[name = string("hidden_states_33_cast_fp16")];
tensor<int32, [1]> mean_17_axes_0 = const()[name = string("mean_17_axes_0"), val = tensor<int32, [1]>([-1])];
bool mean_17_keep_dims_0 = const()[name = string("mean_17_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 1, 1]> mean_17_cast_fp16 = reduce_mean(axes = mean_17_axes_0, keep_dims = mean_17_keep_dims_0, x = hidden_states_33_cast_fp16)[name = string("mean_17_cast_fp16")];
tensor<fp16, [1, 1, 3072]> input_57_cast_fp16 = sub(x = hidden_states_33_cast_fp16, y = mean_17_cast_fp16)[name = string("input_57_cast_fp16")];
tensor<int32, [1]> var_990_axes_0 = const()[name = string("op_990_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [3072]> model_model_layers_18_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_18_input_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406073856)))];
tensor<fp16, [1, 1, 3072]> var_990_cast_fp16 = layer_norm(axes = var_990_axes_0, epsilon = var_47_to_fp16, gamma = model_model_layers_18_input_layernorm_weight_to_fp16, x = input_57_cast_fp16)[name = string("op_990_cast_fp16")];
tensor<int32, [3]> var_993 = const()[name = string("op_993"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [1]> var_995_axes_0 = const()[name = string("op_995_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 3072, 1]> var_994 = transpose(perm = var_993, x = var_990_cast_fp16)[name = string("transpose_11")];
tensor<fp16, [1, 3072, 1, 1]> var_995 = expand_dims(axes = var_995_axes_0, x = var_994)[name = string("op_995")];
string var_1002_pad_type_0 = const()[name = string("op_1002_pad_type_0"), val = string("valid")];
tensor<int32, [2]> var_1002_strides_0 = const()[name = string("op_1002_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> var_1002_pad_0 = const()[name = string("op_1002_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> var_1002_dilations_0 = const()[name = string("op_1002_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 var_1002_groups_0 = const()[name = string("op_1002_groups_0"), val = int32(1)];
tensor<fp16, [1, 3072, 1, 1]> var_1002 = conv(dilations = var_1002_dilations_0, groups = var_1002_groups_0, pad = var_1002_pad_0, pad_type = var_1002_pad_type_0, strides = var_1002_strides_0, weight = model_model_layers_18_self_attn_q_proj_weight_palettized, x = var_995)[name = string("op_1002")];
tensor<int32, [4]> var_1003 = const()[name = string("op_1003"), val = tensor<int32, [4]>([1, 24, 1, 128])];
tensor<fp16, [1, 24, 1, 128]> var_1004 = reshape(shape = var_1003, x = var_1002)[name = string("op_1004")];
string var_1011_pad_type_0 = const()[name = string("op_1011_pad_type_0"), val = string("valid")];
tensor<int32, [2]> var_1011_strides_0 = const()[name = string("op_1011_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> var_1011_pad_0 = const()[name = string("op_1011_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> var_1011_dilations_0 = const()[name = string("op_1011_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 var_1011_groups_0 = const()[name = string("op_1011_groups_0"), val = int32(1)];
tensor<fp16, [1, 1024, 1, 1]> var_1011 = conv(dilations = var_1011_dilations_0, groups = var_1011_groups_0, pad = var_1011_pad_0, pad_type = var_1011_pad_type_0, strides = var_1011_strides_0, weight = model_model_layers_18_self_attn_k_proj_weight_palettized, x = var_995)[name = string("op_1011")];
tensor<int32, [4]> var_1012 = const()[name = string("op_1012"), val = tensor<int32, [4]>([1, 8, 1, 128])];
tensor<fp16, [1, 8, 1, 128]> var_1013 = reshape(shape = var_1012, x = var_1011)[name = string("op_1013")];
string var_1020_pad_type_0 = const()[name = string("op_1020_pad_type_0"), val = string("valid")];
tensor<int32, [2]> var_1020_strides_0 = const()[name = string("op_1020_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> var_1020_pad_0 = const()[name = string("op_1020_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> var_1020_dilations_0 = const()[name = string("op_1020_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 var_1020_groups_0 = const()[name = string("op_1020_groups_0"), val = int32(1)];
tensor<fp16, [1, 1024, 1, 1]> var_1020 = conv(dilations = var_1020_dilations_0, groups = var_1020_groups_0, pad = var_1020_pad_0, pad_type = var_1020_pad_type_0, strides = var_1020_strides_0, weight = model_model_layers_18_self_attn_v_proj_weight_palettized, x = var_995)[name = string("op_1020")];
tensor<int32, [4]> var_1021 = const()[name = string("op_1021"), val = tensor<int32, [4]>([1, 8, 1, 128])];
tensor<fp16, [1, 8, 1, 128]> var_1022 = reshape(shape = var_1021, x = var_1020)[name = string("op_1022")];
tensor<int32, [4]> x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor<int32, [4]>([1, 24, 1, 64])];
tensor<bool, [4]> x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 24, 1, 64]> x1_17 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = var_1004)[name = string("x1_17")];
tensor<int32, [4]> x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor<int32, [4]>([1, 24, 1, 128])];
tensor<bool, [4]> x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 24, 1, 64]> x2_17 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = var_1004)[name = string("x2_17")];
tensor<fp16, [1, 24, 1, 64]> var_1036_cast_fp16 = mul(x = x1_17, y = cos_3_cast_fp16)[name = string("op_1036_cast_fp16")];
tensor<fp16, [1, 24, 1, 64]> var_1037_cast_fp16 = mul(x = x2_17, y = sin_3_cast_fp16)[name = string("op_1037_cast_fp16")];
tensor<fp16, [1, 24, 1, 64]> var_1038_cast_fp16 = sub(x = var_1036_cast_fp16, y = var_1037_cast_fp16)[name = string("op_1038_cast_fp16")];
tensor<fp16, [1, 24, 1, 64]> var_1039_cast_fp16 = mul(x = x2_17, y = cos_3_cast_fp16)[name = string("op_1039_cast_fp16")];
tensor<fp16, [1, 24, 1, 64]> var_1040_cast_fp16 = mul(x = x1_17, y = sin_3_cast_fp16)[name = string("op_1040_cast_fp16")];
tensor<fp16, [1, 24, 1, 64]> var_1041_cast_fp16 = add(x = var_1039_cast_fp16, y = var_1040_cast_fp16)[name = string("op_1041_cast_fp16")];
bool rotated_17_interleave_0 = const()[name = string("rotated_17_interleave_0"), val = bool(false)];
tensor<fp16, [1, 24, 1, 128]> rotated_17_cast_fp16 = concat(axis = var_52, interleave = rotated_17_interleave_0, values = (var_1038_cast_fp16, var_1041_cast_fp16))[name = string("rotated_17_cast_fp16")];
tensor<int32, [4]> x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor<int32, [4]>([1, 8, 1, 64])];
tensor<bool, [4]> x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 8, 1, 64]> x1_19 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = var_1013)[name = string("x1_19")];
tensor<int32, [4]> x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor<int32, [4]>([1, 8, 1, 128])];
tensor<bool, [4]> x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 8, 1, 64]> x2_19 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = var_1013)[name = string("x2_19")];
tensor<fp16, [1, 8, 1, 64]> var_1057_cast_fp16 = mul(x = x1_19, y = cos_3_cast_fp16)[name = string("op_1057_cast_fp16")];
tensor<fp16, [1, 8, 1, 64]> var_1058_cast_fp16 = mul(x = x2_19, y = sin_3_cast_fp16)[name = string("op_1058_cast_fp16")];
tensor<fp16, [1, 8, 1, 64]> var_1059_cast_fp16 = sub(x = var_1057_cast_fp16, y = var_1058_cast_fp16)[name = string("op_1059_cast_fp16")];
tensor<fp16, [1, 8, 1, 64]> var_1060_cast_fp16 = mul(x = x2_19, y = cos_3_cast_fp16)[name = string("op_1060_cast_fp16")];
tensor<fp16, [1, 8, 1, 64]> var_1061_cast_fp16 = mul(x = x1_19, y = sin_3_cast_fp16)[name = string("op_1061_cast_fp16")];
tensor<fp16, [1, 8, 1, 64]> var_1062_cast_fp16 = add(x = var_1060_cast_fp16, y = var_1061_cast_fp16)[name = string("op_1062_cast_fp16")];
bool rotated_19_interleave_0 = const()[name = string("rotated_19_interleave_0"), val = bool(false)];
tensor<fp16, [1, 8, 1, 128]> rotated_19_cast_fp16 = concat(axis = var_52, interleave = rotated_19_interleave_0, values = (var_1059_cast_fp16, var_1062_cast_fp16))[name = string("rotated_19_cast_fp16")];
tensor<int32, [1]> expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor<int32, [1]>([18])];
tensor<int32, [1]> expand_dims_49 = const()[name = string("expand_dims_49"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor<int32, [1]>([19])];
int32 concat_34_axis_0 = const()[name = string("concat_34_axis_0"), val = int32(0)];
bool concat_34_interleave_0 = const()[name = string("concat_34_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_34 = concat(axis = concat_34_axis_0, interleave = concat_34_interleave_0, values = (expand_dims_48, expand_dims_49, current_pos, expand_dims_51))[name = string("concat_34")];
tensor<int32, [1]> concat_35_values1_0 = const()[name = string("concat_35_values1_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> concat_35_values3_0 = const()[name = string("concat_35_values3_0"), val = tensor<int32, [1]>([0])];
int32 concat_35_axis_0 = const()[name = string("concat_35_axis_0"), val = int32(0)];
bool concat_35_interleave_0 = const()[name = string("concat_35_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_35 = concat(axis = concat_35_axis_0, interleave = concat_35_interleave_0, values = (expand_dims_52, concat_35_values1_0, var_327, concat_35_values3_0))[name = string("concat_35")];
tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_9_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_34, begin_mask = model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0, end = concat_35, end_mask = model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_9_stride_0, update = rotated_19_cast_fp16, x = coreml_update_state_21)[name = string("model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16")];
write_state(data = model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_8_write_state")];
tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_22 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_8")];
tensor<int32, [1]> expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor<int32, [1]>([46])];
tensor<int32, [1]> expand_dims_55 = const()[name = string("expand_dims_55"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_57 = const()[name = string("expand_dims_57"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_58 = const()[name = string("expand_dims_58"), val = tensor<int32, [1]>([47])];
int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)];
bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (expand_dims_54, expand_dims_55, current_pos, expand_dims_57))[name = string("concat_38")];
tensor<int32, [1]> concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor<int32, [1]>([0])];
int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)];
bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (expand_dims_58, concat_39_values1_0, var_327, concat_39_values3_0))[name = string("concat_39")];
tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_10_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_38, begin_mask = model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0, end = concat_39, end_mask = model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_10_stride_0, update = var_1022, x = coreml_update_state_22)[name = string("model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16")];
write_state(data = model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_9_write_state")];
tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_23 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_9")];
tensor<int32, [4]> var_1082_begin_0 = const()[name = string("op_1082_begin_0"), val = tensor<int32, [4]>([18, 0, 0, 0])];
tensor<int32, [4]> var_1082_end_0 = const()[name = string("op_1082_end_0"), val = tensor<int32, [4]>([19, 8, 1024, 128])];
tensor<bool, [4]> var_1082_end_mask_0 = const()[name = string("op_1082_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<fp16, [1, 8, 1024, 128]> var_1082_cast_fp16 = slice_by_index(begin = var_1082_begin_0, end = var_1082_end_0, end_mask = var_1082_end_mask_0, x = coreml_update_state_23)[name = string("op_1082_cast_fp16")];
tensor<int32, [1]> K_layer_cache_9_axes_0 = const()[name = string("K_layer_cache_9_axes_0"), val = tensor<int32, [1]>([0])];
tensor<fp16, [8, 1024, 128]> K_layer_cache_9_cast_fp16 = squeeze(axes = K_layer_cache_9_axes_0, x = var_1082_cast_fp16)[name = string("K_layer_cache_9_cast_fp16")];
tensor<int32, [4]> var_1084_begin_0 = const()[name = string("op_1084_begin_0"), val = tensor<int32, [4]>([46, 0, 0, 0])];
tensor<int32, [4]> var_1084_end_0 = const()[name = string("op_1084_end_0"), val = tensor<int32, [4]>([47, 8, 1024, 128])];
tensor<bool, [4]> var_1084_end_mask_0 = const()[name = string("op_1084_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<fp16, [1, 8, 1024, 128]> var_1084_cast_fp16 = slice_by_index(begin = var_1084_begin_0, end = var_1084_end_0, end_mask = var_1084_end_mask_0, x = coreml_update_state_23)[name = string("op_1084_cast_fp16")];
tensor<int32, [1]> V_layer_cache_9_axes_0 = const()[name = string("V_layer_cache_9_axes_0"), val = tensor<int32, [1]>([0])];
tensor<fp16, [8, 1024, 128]> V_layer_cache_9_cast_fp16 = squeeze(axes = V_layer_cache_9_axes_0, x = var_1084_cast_fp16)[name = string("V_layer_cache_9_cast_fp16")];
tensor<int32, [1]> x_123_axes_0 = const()[name = string("x_123_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [8, 1, 1024, 128]> x_123_cast_fp16 = expand_dims(axes = x_123_axes_0, x = K_layer_cache_9_cast_fp16)[name = string("x_123_cast_fp16")];
tensor<int32, [4]> var_1093 = const()[name = string("op_1093"), val = tensor<int32, [4]>([1, 3, 1, 1])];
tensor<fp16, [8, 3, 1024, 128]> x_125_cast_fp16 = tile(reps = var_1093, x = x_123_cast_fp16)[name = string("x_125_cast_fp16")];
tensor<int32, [4]> var_1097 = const()[name = string("op_1097"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
tensor<fp16, [1, 24, 1024, 128]> key_states_19_cast_fp16 = reshape(shape = var_1097, x = x_125_cast_fp16)[name = string("key_states_19_cast_fp16")];
tensor<int32, [1]> x_129_axes_0 = const()[name = string("x_129_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [8, 1, 1024, 128]> x_129_cast_fp16 = expand_dims(axes = x_129_axes_0, x = V_layer_cache_9_cast_fp16)[name = string("x_129_cast_fp16")];
tensor<int32, [4]> var_1100 = const()[name = string("op_1100"), val = tensor<int32, [4]>([1, 3, 1, 1])];
tensor<fp16, [8, 3, 1024, 128]> x_131_cast_fp16 = tile(reps = var_1100, x = x_129_cast_fp16)[name = string("x_131_cast_fp16")];
tensor<int32, [4]> var_1104 = const()[name = string("op_1104"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
tensor<fp16, [1, 24, 1024, 128]> value_states_19_cast_fp16 = reshape(shape = var_1104, x = x_131_cast_fp16)[name = string("value_states_19_cast_fp16")];
bool var_1107_transpose_x_1 = const()[name = string("op_1107_transpose_x_1"), val = bool(false)];
bool var_1107_transpose_y_1 = const()[name = string("op_1107_transpose_y_1"), val = bool(true)];
tensor<fp16, [1, 24, 1, 1024]> var_1107_cast_fp16 = matmul(transpose_x = var_1107_transpose_x_1, transpose_y = var_1107_transpose_y_1, x = rotated_17_cast_fp16, y = key_states_19_cast_fp16)[name = string("op_1107_cast_fp16")];
fp16 var_1108_to_fp16 = const()[name = string("op_1108_to_fp16"), val = fp16(0x1.6ap-4)];
tensor<fp16, [1, 24, 1, 1024]> attn_weights_17_cast_fp16 = mul(x = var_1107_cast_fp16, y = var_1108_to_fp16)[name = string("attn_weights_17_cast_fp16")];
tensor<fp16, [1, 24, 1, 1024]> x_133_cast_fp16 = add(x = attn_weights_17_cast_fp16, y = causal_mask)[name = string("x_133_cast_fp16")];
tensor<int32, [1]> reduce_max_4_axes_0 = const()[name = string("reduce_max_4_axes_0"), val = tensor<int32, [1]>([-1])];
bool reduce_max_4_keep_dims_0 = const()[name = string("reduce_max_4_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 24, 1, 1]> reduce_max_4_cast_fp16 = reduce_max(axes = reduce_max_4_axes_0, keep_dims = reduce_max_4_keep_dims_0, x = x_133_cast_fp16)[name = string("reduce_max_4_cast_fp16")];
tensor<fp16, [1, 24, 1, 1024]> x_135_cast_fp16 = sub(x = x_133_cast_fp16, y = reduce_max_4_cast_fp16)[name = string("x_135_cast_fp16")];
tensor<fp16, [1, 24, 1, 1024]> exp_x_9_cast_fp16 = exp(x = x_135_cast_fp16)[name = string("exp_x_9_cast_fp16")];
tensor<int32, [1]> var_1119_axes_0 = const()[name = string("op_1119_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_1119_keep_dims_0 = const()[name = string("op_1119_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 24, 1, 1]> var_1119_cast_fp16 = reduce_sum(axes = var_1119_axes_0, keep_dims = var_1119_keep_dims_0, x = exp_x_9_cast_fp16)[name = string("op_1119_cast_fp16")];
tensor<fp16, [1, 24, 1, 1024]> attn_weights_19_cast_fp16 = real_div(x = exp_x_9_cast_fp16, y = var_1119_cast_fp16)[name = string("attn_weights_19_cast_fp16")];
bool attn_output_25_transpose_x_0 = const()[name = string("attn_output_25_transpose_x_0"), val = bool(false)];
bool attn_output_25_transpose_y_0 = const()[name = string("attn_output_25_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 24, 1, 128]> attn_output_25_cast_fp16 = matmul(transpose_x = attn_output_25_transpose_x_0, transpose_y = attn_output_25_transpose_y_0, x = attn_weights_19_cast_fp16, y = value_states_19_cast_fp16)[name = string("attn_output_25_cast_fp16")];
tensor<int32, [4]> var_1122_perm_0 = const()[name = string("op_1122_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_1124 = const()[name = string("op_1124"), val = tensor<int32, [3]>([1, 1, 3072])];
tensor<fp16, [1, 1, 24, 128]> var_1122_cast_fp16 = transpose(perm = var_1122_perm_0, x = attn_output_25_cast_fp16)[name = string("transpose_10")];
tensor<fp16, [1, 1, 3072]> input_61_cast_fp16 = reshape(shape = var_1124, x = var_1122_cast_fp16)[name = string("input_61_cast_fp16")];
tensor<fp16, [3072, 3072]> model_model_layers_18_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406080064))), lut = tensor<fp16, [384, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410798720))))[name = string("model_model_layers_18_self_attn_o_proj_weight_promoted_to_fp16_palettized")];
tensor<fp16, [1, 1, 3072]> linear_4_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_18_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_61_cast_fp16)[name = string("linear_4_cast_fp16")];
tensor<fp16, [1, 1, 3072]> hidden_states_37_cast_fp16 = add(x = hidden_states_33_cast_fp16, y = linear_4_cast_fp16)[name = string("hidden_states_37_cast_fp16")];
tensor<int32, [1]> mean_19_axes_0 = const()[name = string("mean_19_axes_0"), val = tensor<int32, [1]>([-1])];
bool mean_19_keep_dims_0 = const()[name = string("mean_19_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 1, 1]> mean_19_cast_fp16 = reduce_mean(axes = mean_19_axes_0, keep_dims = mean_19_keep_dims_0, x = hidden_states_37_cast_fp16)[name = string("mean_19_cast_fp16")];
tensor<fp16, [1, 1, 3072]> input_63_cast_fp16 = sub(x = hidden_states_37_cast_fp16, y = mean_19_cast_fp16)[name = string("input_63_cast_fp16")];
tensor<int32, [1]> var_1135_axes_0 = const()[name = string("op_1135_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [3072]> model_model_layers_18_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_18_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410811072)))];
tensor<fp16, [1, 1, 3072]> var_1135_cast_fp16 = layer_norm(axes = var_1135_axes_0, epsilon = var_47_to_fp16, gamma = model_model_layers_18_post_attention_layernorm_weight_to_fp16, x = input_63_cast_fp16)[name = string("op_1135_cast_fp16")];
tensor<int32, [3]> var_1142 = const()[name = string("op_1142"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [1]> input_65_axes_0 = const()[name = string("input_65_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 3072, 1]> var_1143 = transpose(perm = var_1142, x = var_1135_cast_fp16)[name = string("transpose_9")];
tensor<fp16, [1, 3072, 1, 1]> input_65 = expand_dims(axes = input_65_axes_0, x = var_1143)[name = string("input_65")];
string input_67_pad_type_0 = const()[name = string("input_67_pad_type_0"), val = string("valid")];
tensor<int32, [2]> input_67_strides_0 = const()[name = string("input_67_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> input_67_pad_0 = const()[name = string("input_67_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> input_67_dilations_0 = const()[name = string("input_67_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 input_67_groups_0 = const()[name = string("input_67_groups_0"), val = int32(1)];
tensor<fp16, [1, 8192, 1, 1]> input_67 = conv(dilations = input_67_dilations_0, groups = input_67_groups_0, pad = input_67_pad_0, pad_type = input_67_pad_type_0, strides = input_67_strides_0, weight = model_model_layers_18_mlp_gate_proj_weight_palettized, x = input_65)[name = string("input_67")];
string up_states_9_pad_type_0 = const()[name = string("up_states_9_pad_type_0"), val = string("valid")];
tensor<int32, [2]> up_states_9_strides_0 = const()[name = string("up_states_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> up_states_9_pad_0 = const()[name = string("up_states_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> up_states_9_dilations_0 = const()[name = string("up_states_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 up_states_9_groups_0 = const()[name = string("up_states_9_groups_0"), val = int32(1)];
tensor<fp16, [1, 8192, 1, 1]> up_states_9 = conv(dilations = up_states_9_dilations_0, groups = up_states_9_groups_0, pad = up_states_9_pad_0, pad_type = up_states_9_pad_type_0, strides = up_states_9_strides_0, weight = model_model_layers_18_mlp_up_proj_weight_palettized, x = input_65)[name = string("up_states_9")];
tensor<fp16, [1, 8192, 1, 1]> gate_states_9 = silu(x = input_67)[name = string("gate_states_9")];
tensor<fp16, [1, 8192, 1, 1]> input_69 = mul(x = gate_states_9, y = up_states_9)[name = string("input_69")];
string hidden_states_39_pad_type_0 = const()[name = string("hidden_states_39_pad_type_0"), val = string("valid")];
tensor<int32, [2]> hidden_states_39_strides_0 = const()[name = string("hidden_states_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> hidden_states_39_pad_0 = const()[name = string("hidden_states_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> hidden_states_39_dilations_0 = const()[name = string("hidden_states_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 hidden_states_39_groups_0 = const()[name = string("hidden_states_39_groups_0"), val = int32(1)];
tensor<fp16, [1, 3072, 1, 1]> hidden_states_39 = conv(dilations = hidden_states_39_dilations_0, groups = hidden_states_39_groups_0, pad = hidden_states_39_pad_0, pad_type = hidden_states_39_pad_type_0, strides = hidden_states_39_strides_0, weight = model_model_layers_18_mlp_down_proj_weight_palettized, x = input_69)[name = string("hidden_states_39")];
tensor<int32, [1]> var_1165_axes_0 = const()[name = string("op_1165_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 3072, 1]> var_1165 = squeeze(axes = var_1165_axes_0, x = hidden_states_39)[name = string("op_1165")];
tensor<int32, [3]> var_1166 = const()[name = string("op_1166"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 1, 3072]> var_1167 = transpose(perm = var_1166, x = var_1165)[name = string("transpose_8")];
tensor<fp16, [1, 1, 3072]> hidden_states_41_cast_fp16 = add(x = hidden_states_37_cast_fp16, y = var_1167)[name = string("hidden_states_41_cast_fp16")];
tensor<int32, [1]> mean_21_axes_0 = const()[name = string("mean_21_axes_0"), val = tensor<int32, [1]>([-1])];
bool mean_21_keep_dims_0 = const()[name = string("mean_21_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 1, 1]> mean_21_cast_fp16 = reduce_mean(axes = mean_21_axes_0, keep_dims = mean_21_keep_dims_0, x = hidden_states_41_cast_fp16)[name = string("mean_21_cast_fp16")];
tensor<fp16, [1, 1, 3072]> input_71_cast_fp16 = sub(x = hidden_states_41_cast_fp16, y = mean_21_cast_fp16)[name = string("input_71_cast_fp16")];
tensor<int32, [1]> var_1175_axes_0 = const()[name = string("op_1175_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [3072]> model_model_layers_19_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_19_input_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410817280)))];
tensor<fp16, [1, 1, 3072]> var_1175_cast_fp16 = layer_norm(axes = var_1175_axes_0, epsilon = var_47_to_fp16, gamma = model_model_layers_19_input_layernorm_weight_to_fp16, x = input_71_cast_fp16)[name = string("op_1175_cast_fp16")];
tensor<int32, [3]> var_1178 = const()[name = string("op_1178"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [1]> var_1180_axes_0 = const()[name = string("op_1180_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 3072, 1]> var_1179 = transpose(perm = var_1178, x = var_1175_cast_fp16)[name = string("transpose_7")];
tensor<fp16, [1, 3072, 1, 1]> var_1180 = expand_dims(axes = var_1180_axes_0, x = var_1179)[name = string("op_1180")];
string var_1187_pad_type_0 = const()[name = string("op_1187_pad_type_0"), val = string("valid")];
tensor<int32, [2]> var_1187_strides_0 = const()[name = string("op_1187_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> var_1187_pad_0 = const()[name = string("op_1187_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> var_1187_dilations_0 = const()[name = string("op_1187_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 var_1187_groups_0 = const()[name = string("op_1187_groups_0"), val = int32(1)];
tensor<fp16, [1, 3072, 1, 1]> var_1187 = conv(dilations = var_1187_dilations_0, groups = var_1187_groups_0, pad = var_1187_pad_0, pad_type = var_1187_pad_type_0, strides = var_1187_strides_0, weight = model_model_layers_19_self_attn_q_proj_weight_palettized, x = var_1180)[name = string("op_1187")];
tensor<int32, [4]> var_1188 = const()[name = string("op_1188"), val = tensor<int32, [4]>([1, 24, 1, 128])];
tensor<fp16, [1, 24, 1, 128]> var_1189 = reshape(shape = var_1188, x = var_1187)[name = string("op_1189")];
string var_1196_pad_type_0 = const()[name = string("op_1196_pad_type_0"), val = string("valid")];
tensor<int32, [2]> var_1196_strides_0 = const()[name = string("op_1196_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> var_1196_pad_0 = const()[name = string("op_1196_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> var_1196_dilations_0 = const()[name = string("op_1196_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 var_1196_groups_0 = const()[name = string("op_1196_groups_0"), val = int32(1)];
tensor<fp16, [1, 1024, 1, 1]> var_1196 = conv(dilations = var_1196_dilations_0, groups = var_1196_groups_0, pad = var_1196_pad_0, pad_type = var_1196_pad_type_0, strides = var_1196_strides_0, weight = model_model_layers_19_self_attn_k_proj_weight_palettized, x = var_1180)[name = string("op_1196")];
tensor<int32, [4]> var_1197 = const()[name = string("op_1197"), val = tensor<int32, [4]>([1, 8, 1, 128])];
tensor<fp16, [1, 8, 1, 128]> var_1198 = reshape(shape = var_1197, x = var_1196)[name = string("op_1198")];
string var_1205_pad_type_0 = const()[name = string("op_1205_pad_type_0"), val = string("valid")];
tensor<int32, [2]> var_1205_strides_0 = const()[name = string("op_1205_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> var_1205_pad_0 = const()[name = string("op_1205_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> var_1205_dilations_0 = const()[name = string("op_1205_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 var_1205_groups_0 = const()[name = string("op_1205_groups_0"), val = int32(1)];
tensor<fp16, [1, 1024, 1, 1]> var_1205 = conv(dilations = var_1205_dilations_0, groups = var_1205_groups_0, pad = var_1205_pad_0, pad_type = var_1205_pad_type_0, strides = var_1205_strides_0, weight = model_model_layers_19_self_attn_v_proj_weight_palettized, x = var_1180)[name = string("op_1205")];
tensor<int32, [4]> var_1206 = const()[name = string("op_1206"), val = tensor<int32, [4]>([1, 8, 1, 128])];
tensor<fp16, [1, 8, 1, 128]> var_1207 = reshape(shape = var_1206, x = var_1205)[name = string("op_1207")];
tensor<int32, [4]> x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor<int32, [4]>([1, 24, 1, 64])];
tensor<bool, [4]> x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 24, 1, 64]> x1_21 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = var_1189)[name = string("x1_21")];
tensor<int32, [4]> x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor<int32, [4]>([1, 24, 1, 128])];
tensor<bool, [4]> x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 24, 1, 64]> x2_21 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = var_1189)[name = string("x2_21")];
tensor<fp16, [1, 24, 1, 64]> var_1221_cast_fp16 = mul(x = x1_21, y = cos_3_cast_fp16)[name = string("op_1221_cast_fp16")];
tensor<fp16, [1, 24, 1, 64]> var_1222_cast_fp16 = mul(x = x2_21, y = sin_3_cast_fp16)[name = string("op_1222_cast_fp16")];
tensor<fp16, [1, 24, 1, 64]> var_1223_cast_fp16 = sub(x = var_1221_cast_fp16, y = var_1222_cast_fp16)[name = string("op_1223_cast_fp16")];
tensor<fp16, [1, 24, 1, 64]> var_1224_cast_fp16 = mul(x = x2_21, y = cos_3_cast_fp16)[name = string("op_1224_cast_fp16")];
tensor<fp16, [1, 24, 1, 64]> var_1225_cast_fp16 = mul(x = x1_21, y = sin_3_cast_fp16)[name = string("op_1225_cast_fp16")];
tensor<fp16, [1, 24, 1, 64]> var_1226_cast_fp16 = add(x = var_1224_cast_fp16, y = var_1225_cast_fp16)[name = string("op_1226_cast_fp16")];
bool rotated_21_interleave_0 = const()[name = string("rotated_21_interleave_0"), val = bool(false)];
tensor<fp16, [1, 24, 1, 128]> rotated_21_cast_fp16 = concat(axis = var_52, interleave = rotated_21_interleave_0, values = (var_1223_cast_fp16, var_1226_cast_fp16))[name = string("rotated_21_cast_fp16")];
tensor<int32, [4]> x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor<int32, [4]>([1, 8, 1, 64])];
tensor<bool, [4]> x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 8, 1, 64]> x1_23 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = var_1198)[name = string("x1_23")];
tensor<int32, [4]> x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor<int32, [4]>([1, 8, 1, 128])];
tensor<bool, [4]> x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 8, 1, 64]> x2_23 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = var_1198)[name = string("x2_23")];
tensor<fp16, [1, 8, 1, 64]> var_1242_cast_fp16 = mul(x = x1_23, y = cos_3_cast_fp16)[name = string("op_1242_cast_fp16")];
tensor<fp16, [1, 8, 1, 64]> var_1243_cast_fp16 = mul(x = x2_23, y = sin_3_cast_fp16)[name = string("op_1243_cast_fp16")];
tensor<fp16, [1, 8, 1, 64]> var_1244_cast_fp16 = sub(x = var_1242_cast_fp16, y = var_1243_cast_fp16)[name = string("op_1244_cast_fp16")];
tensor<fp16, [1, 8, 1, 64]> var_1245_cast_fp16 = mul(x = x2_23, y = cos_3_cast_fp16)[name = string("op_1245_cast_fp16")];
tensor<fp16, [1, 8, 1, 64]> var_1246_cast_fp16 = mul(x = x1_23, y = sin_3_cast_fp16)[name = string("op_1246_cast_fp16")];
tensor<fp16, [1, 8, 1, 64]> var_1247_cast_fp16 = add(x = var_1245_cast_fp16, y = var_1246_cast_fp16)[name = string("op_1247_cast_fp16")];
bool rotated_23_interleave_0 = const()[name = string("rotated_23_interleave_0"), val = bool(false)];
tensor<fp16, [1, 8, 1, 128]> rotated_23_cast_fp16 = concat(axis = var_52, interleave = rotated_23_interleave_0, values = (var_1244_cast_fp16, var_1247_cast_fp16))[name = string("rotated_23_cast_fp16")];
tensor<int32, [1]> expand_dims_60 = const()[name = string("expand_dims_60"), val = tensor<int32, [1]>([19])];
tensor<int32, [1]> expand_dims_61 = const()[name = string("expand_dims_61"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_63 = const()[name = string("expand_dims_63"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor<int32, [1]>([20])];
int32 concat_42_axis_0 = const()[name = string("concat_42_axis_0"), val = int32(0)];
bool concat_42_interleave_0 = const()[name = string("concat_42_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_42 = concat(axis = concat_42_axis_0, interleave = concat_42_interleave_0, values = (expand_dims_60, expand_dims_61, current_pos, expand_dims_63))[name = string("concat_42")];
tensor<int32, [1]> concat_43_values1_0 = const()[name = string("concat_43_values1_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor<int32, [1]>([0])];
int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)];
bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_64, concat_43_values1_0, var_327, concat_43_values3_0))[name = string("concat_43")];
tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_11_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_42, begin_mask = model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0, end = concat_43, end_mask = model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_11_stride_0, update = rotated_23_cast_fp16, x = coreml_update_state_23)[name = string("model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16")];
write_state(data = model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_10_write_state")];
tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_24 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_10")];
tensor<int32, [1]> expand_dims_66 = const()[name = string("expand_dims_66"), val = tensor<int32, [1]>([47])];
tensor<int32, [1]> expand_dims_67 = const()[name = string("expand_dims_67"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_69 = const()[name = string("expand_dims_69"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_70 = const()[name = string("expand_dims_70"), val = tensor<int32, [1]>([48])];
int32 concat_46_axis_0 = const()[name = string("concat_46_axis_0"), val = int32(0)];
bool concat_46_interleave_0 = const()[name = string("concat_46_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_46 = concat(axis = concat_46_axis_0, interleave = concat_46_interleave_0, values = (expand_dims_66, expand_dims_67, current_pos, expand_dims_69))[name = string("concat_46")];
tensor<int32, [1]> concat_47_values1_0 = const()[name = string("concat_47_values1_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> concat_47_values3_0 = const()[name = string("concat_47_values3_0"), val = tensor<int32, [1]>([0])];
int32 concat_47_axis_0 = const()[name = string("concat_47_axis_0"), val = int32(0)];
bool concat_47_interleave_0 = const()[name = string("concat_47_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_47 = concat(axis = concat_47_axis_0, interleave = concat_47_interleave_0, values = (expand_dims_70, concat_47_values1_0, var_327, concat_47_values3_0))[name = string("concat_47")];
tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_12_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_46, begin_mask = model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0, end = concat_47, end_mask = model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_12_stride_0, update = var_1207, x = coreml_update_state_24)[name = string("model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16")];
write_state(data = model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_11_write_state")];
tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_25 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_11")];
tensor<int32, [4]> var_1267_begin_0 = const()[name = string("op_1267_begin_0"), val = tensor<int32, [4]>([19, 0, 0, 0])];
tensor<int32, [4]> var_1267_end_0 = const()[name = string("op_1267_end_0"), val = tensor<int32, [4]>([20, 8, 1024, 128])];
tensor<bool, [4]> var_1267_end_mask_0 = const()[name = string("op_1267_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<fp16, [1, 8, 1024, 128]> var_1267_cast_fp16 = slice_by_index(begin = var_1267_begin_0, end = var_1267_end_0, end_mask = var_1267_end_mask_0, x = coreml_update_state_25)[name = string("op_1267_cast_fp16")];
tensor<int32, [1]> K_layer_cache_11_axes_0 = const()[name = string("K_layer_cache_11_axes_0"), val = tensor<int32, [1]>([0])];
tensor<fp16, [8, 1024, 128]> K_layer_cache_11_cast_fp16 = squeeze(axes = K_layer_cache_11_axes_0, x = var_1267_cast_fp16)[name = string("K_layer_cache_11_cast_fp16")];
tensor<int32, [4]> var_1269_begin_0 = const()[name = string("op_1269_begin_0"), val = tensor<int32, [4]>([47, 0, 0, 0])];
tensor<int32, [4]> var_1269_end_0 = const()[name = string("op_1269_end_0"), val = tensor<int32, [4]>([48, 8, 1024, 128])];
tensor<bool, [4]> var_1269_end_mask_0 = const()[name = string("op_1269_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<fp16, [1, 8, 1024, 128]> var_1269_cast_fp16 = slice_by_index(begin = var_1269_begin_0, end = var_1269_end_0, end_mask = var_1269_end_mask_0, x = coreml_update_state_25)[name = string("op_1269_cast_fp16")];
tensor<int32, [1]> V_layer_cache_11_axes_0 = const()[name = string("V_layer_cache_11_axes_0"), val = tensor<int32, [1]>([0])];
tensor<fp16, [8, 1024, 128]> V_layer_cache_11_cast_fp16 = squeeze(axes = V_layer_cache_11_axes_0, x = var_1269_cast_fp16)[name = string("V_layer_cache_11_cast_fp16")];
tensor<int32, [1]> x_151_axes_0 = const()[name = string("x_151_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [8, 1, 1024, 128]> x_151_cast_fp16 = expand_dims(axes = x_151_axes_0, x = K_layer_cache_11_cast_fp16)[name = string("x_151_cast_fp16")];
tensor<int32, [4]> var_1278 = const()[name = string("op_1278"), val = tensor<int32, [4]>([1, 3, 1, 1])];
tensor<fp16, [8, 3, 1024, 128]> x_153_cast_fp16 = tile(reps = var_1278, x = x_151_cast_fp16)[name = string("x_153_cast_fp16")];
tensor<int32, [4]> var_1282 = const()[name = string("op_1282"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
tensor<fp16, [1, 24, 1024, 128]> key_states_23_cast_fp16 = reshape(shape = var_1282, x = x_153_cast_fp16)[name = string("key_states_23_cast_fp16")];
tensor<int32, [1]> x_157_axes_0 = const()[name = string("x_157_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [8, 1, 1024, 128]> x_157_cast_fp16 = expand_dims(axes = x_157_axes_0, x = V_layer_cache_11_cast_fp16)[name = string("x_157_cast_fp16")];
tensor<int32, [4]> var_1285 = const()[name = string("op_1285"), val = tensor<int32, [4]>([1, 3, 1, 1])];
tensor<fp16, [8, 3, 1024, 128]> x_159_cast_fp16 = tile(reps = var_1285, x = x_157_cast_fp16)[name = string("x_159_cast_fp16")];
tensor<int32, [4]> var_1289 = const()[name = string("op_1289"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
tensor<fp16, [1, 24, 1024, 128]> value_states_23_cast_fp16 = reshape(shape = var_1289, x = x_159_cast_fp16)[name = string("value_states_23_cast_fp16")];
bool var_1292_transpose_x_1 = const()[name = string("op_1292_transpose_x_1"), val = bool(false)];
bool var_1292_transpose_y_1 = const()[name = string("op_1292_transpose_y_1"), val = bool(true)];
tensor<fp16, [1, 24, 1, 1024]> var_1292_cast_fp16 = matmul(transpose_x = var_1292_transpose_x_1, transpose_y = var_1292_transpose_y_1, x = rotated_21_cast_fp16, y = key_states_23_cast_fp16)[name = string("op_1292_cast_fp16")];
fp16 var_1293_to_fp16 = const()[name = string("op_1293_to_fp16"), val = fp16(0x1.6ap-4)];
tensor<fp16, [1, 24, 1, 1024]> attn_weights_21_cast_fp16 = mul(x = var_1292_cast_fp16, y = var_1293_to_fp16)[name = string("attn_weights_21_cast_fp16")];
tensor<fp16, [1, 24, 1, 1024]> x_161_cast_fp16 = add(x = attn_weights_21_cast_fp16, y = causal_mask)[name = string("x_161_cast_fp16")];
tensor<int32, [1]> reduce_max_5_axes_0 = const()[name = string("reduce_max_5_axes_0"), val = tensor<int32, [1]>([-1])];
bool reduce_max_5_keep_dims_0 = const()[name = string("reduce_max_5_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 24, 1, 1]> reduce_max_5_cast_fp16 = reduce_max(axes = reduce_max_5_axes_0, keep_dims = reduce_max_5_keep_dims_0, x = x_161_cast_fp16)[name = string("reduce_max_5_cast_fp16")];
tensor<fp16, [1, 24, 1, 1024]> x_163_cast_fp16 = sub(x = x_161_cast_fp16, y = reduce_max_5_cast_fp16)[name = string("x_163_cast_fp16")];
tensor<fp16, [1, 24, 1, 1024]> exp_x_11_cast_fp16 = exp(x = x_163_cast_fp16)[name = string("exp_x_11_cast_fp16")];
tensor<int32, [1]> var_1304_axes_0 = const()[name = string("op_1304_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_1304_keep_dims_0 = const()[name = string("op_1304_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 24, 1, 1]> var_1304_cast_fp16 = reduce_sum(axes = var_1304_axes_0, keep_dims = var_1304_keep_dims_0, x = exp_x_11_cast_fp16)[name = string("op_1304_cast_fp16")];
tensor<fp16, [1, 24, 1, 1024]> attn_weights_23_cast_fp16 = real_div(x = exp_x_11_cast_fp16, y = var_1304_cast_fp16)[name = string("attn_weights_23_cast_fp16")];
bool attn_output_31_transpose_x_0 = const()[name = string("attn_output_31_transpose_x_0"), val = bool(false)];
bool attn_output_31_transpose_y_0 = const()[name = string("attn_output_31_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 24, 1, 128]> attn_output_31_cast_fp16 = matmul(transpose_x = attn_output_31_transpose_x_0, transpose_y = attn_output_31_transpose_y_0, x = attn_weights_23_cast_fp16, y = value_states_23_cast_fp16)[name = string("attn_output_31_cast_fp16")];
tensor<int32, [4]> var_1307_perm_0 = const()[name = string("op_1307_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_1309 = const()[name = string("op_1309"), val = tensor<int32, [3]>([1, 1, 3072])];
tensor<fp16, [1, 1, 24, 128]> var_1307_cast_fp16 = transpose(perm = var_1307_perm_0, x = attn_output_31_cast_fp16)[name = string("transpose_6")];
tensor<fp16, [1, 1, 3072]> input_75_cast_fp16 = reshape(shape = var_1309, x = var_1307_cast_fp16)[name = string("input_75_cast_fp16")];
tensor<fp16, [3072, 3072]> model_model_layers_19_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410823488))), lut = tensor<fp16, [384, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415542144))))[name = string("model_model_layers_19_self_attn_o_proj_weight_promoted_to_fp16_palettized")];
tensor<fp16, [1, 1, 3072]> linear_5_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_19_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_75_cast_fp16)[name = string("linear_5_cast_fp16")];
tensor<fp16, [1, 1, 3072]> hidden_states_45_cast_fp16 = add(x = hidden_states_41_cast_fp16, y = linear_5_cast_fp16)[name = string("hidden_states_45_cast_fp16")];
tensor<int32, [1]> mean_23_axes_0 = const()[name = string("mean_23_axes_0"), val = tensor<int32, [1]>([-1])];
bool mean_23_keep_dims_0 = const()[name = string("mean_23_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 1, 1]> mean_23_cast_fp16 = reduce_mean(axes = mean_23_axes_0, keep_dims = mean_23_keep_dims_0, x = hidden_states_45_cast_fp16)[name = string("mean_23_cast_fp16")];
tensor<fp16, [1, 1, 3072]> input_77_cast_fp16 = sub(x = hidden_states_45_cast_fp16, y = mean_23_cast_fp16)[name = string("input_77_cast_fp16")];
tensor<int32, [1]> var_1320_axes_0 = const()[name = string("op_1320_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [3072]> model_model_layers_19_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_19_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415554496)))];
tensor<fp16, [1, 1, 3072]> var_1320_cast_fp16 = layer_norm(axes = var_1320_axes_0, epsilon = var_47_to_fp16, gamma = model_model_layers_19_post_attention_layernorm_weight_to_fp16, x = input_77_cast_fp16)[name = string("op_1320_cast_fp16")];
tensor<int32, [3]> var_1327 = const()[name = string("op_1327"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [1]> input_79_axes_0 = const()[name = string("input_79_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 3072, 1]> var_1328 = transpose(perm = var_1327, x = var_1320_cast_fp16)[name = string("transpose_5")];
tensor<fp16, [1, 3072, 1, 1]> input_79 = expand_dims(axes = input_79_axes_0, x = var_1328)[name = string("input_79")];
string input_81_pad_type_0 = const()[name = string("input_81_pad_type_0"), val = string("valid")];
tensor<int32, [2]> input_81_strides_0 = const()[name = string("input_81_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> input_81_pad_0 = const()[name = string("input_81_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> input_81_dilations_0 = const()[name = string("input_81_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 input_81_groups_0 = const()[name = string("input_81_groups_0"), val = int32(1)];
tensor<fp16, [1, 8192, 1, 1]> input_81 = conv(dilations = input_81_dilations_0, groups = input_81_groups_0, pad = input_81_pad_0, pad_type = input_81_pad_type_0, strides = input_81_strides_0, weight = model_model_layers_19_mlp_gate_proj_weight_palettized, x = input_79)[name = string("input_81")];
string up_states_11_pad_type_0 = const()[name = string("up_states_11_pad_type_0"), val = string("valid")];
tensor<int32, [2]> up_states_11_strides_0 = const()[name = string("up_states_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> up_states_11_pad_0 = const()[name = string("up_states_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> up_states_11_dilations_0 = const()[name = string("up_states_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 up_states_11_groups_0 = const()[name = string("up_states_11_groups_0"), val = int32(1)];
tensor<fp16, [1, 8192, 1, 1]> up_states_11 = conv(dilations = up_states_11_dilations_0, groups = up_states_11_groups_0, pad = up_states_11_pad_0, pad_type = up_states_11_pad_type_0, strides = up_states_11_strides_0, weight = model_model_layers_19_mlp_up_proj_weight_palettized, x = input_79)[name = string("up_states_11")];
tensor<fp16, [1, 8192, 1, 1]> gate_states_11 = silu(x = input_81)[name = string("gate_states_11")];
tensor<fp16, [1, 8192, 1, 1]> input_83 = mul(x = gate_states_11, y = up_states_11)[name = string("input_83")];
string hidden_states_47_pad_type_0 = const()[name = string("hidden_states_47_pad_type_0"), val = string("valid")];
tensor<int32, [2]> hidden_states_47_strides_0 = const()[name = string("hidden_states_47_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> hidden_states_47_pad_0 = const()[name = string("hidden_states_47_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> hidden_states_47_dilations_0 = const()[name = string("hidden_states_47_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 hidden_states_47_groups_0 = const()[name = string("hidden_states_47_groups_0"), val = int32(1)];
tensor<fp16, [1, 3072, 1, 1]> hidden_states_47 = conv(dilations = hidden_states_47_dilations_0, groups = hidden_states_47_groups_0, pad = hidden_states_47_pad_0, pad_type = hidden_states_47_pad_type_0, strides = hidden_states_47_strides_0, weight = model_model_layers_19_mlp_down_proj_weight_palettized, x = input_83)[name = string("hidden_states_47")];
tensor<int32, [1]> var_1350_axes_0 = const()[name = string("op_1350_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 3072, 1]> var_1350 = squeeze(axes = var_1350_axes_0, x = hidden_states_47)[name = string("op_1350")];
tensor<int32, [3]> var_1351 = const()[name = string("op_1351"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 1, 3072]> var_1352 = transpose(perm = var_1351, x = var_1350)[name = string("transpose_4")];
tensor<fp16, [1, 1, 3072]> hidden_states_49_cast_fp16 = add(x = hidden_states_45_cast_fp16, y = var_1352)[name = string("hidden_states_49_cast_fp16")];
tensor<int32, [1]> mean_25_axes_0 = const()[name = string("mean_25_axes_0"), val = tensor<int32, [1]>([-1])];
bool mean_25_keep_dims_0 = const()[name = string("mean_25_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 1, 1]> mean_25_cast_fp16 = reduce_mean(axes = mean_25_axes_0, keep_dims = mean_25_keep_dims_0, x = hidden_states_49_cast_fp16)[name = string("mean_25_cast_fp16")];
tensor<fp16, [1, 1, 3072]> input_85_cast_fp16 = sub(x = hidden_states_49_cast_fp16, y = mean_25_cast_fp16)[name = string("input_85_cast_fp16")];
tensor<int32, [1]> var_1360_axes_0 = const()[name = string("op_1360_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [3072]> model_model_layers_20_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_20_input_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415560704)))];
tensor<fp16, [1, 1, 3072]> var_1360_cast_fp16 = layer_norm(axes = var_1360_axes_0, epsilon = var_47_to_fp16, gamma = model_model_layers_20_input_layernorm_weight_to_fp16, x = input_85_cast_fp16)[name = string("op_1360_cast_fp16")];
tensor<int32, [3]> var_1363 = const()[name = string("op_1363"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [1]> var_1365_axes_0 = const()[name = string("op_1365_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 3072, 1]> var_1364 = transpose(perm = var_1363, x = var_1360_cast_fp16)[name = string("transpose_3")];
tensor<fp16, [1, 3072, 1, 1]> var_1365 = expand_dims(axes = var_1365_axes_0, x = var_1364)[name = string("op_1365")];
string var_1372_pad_type_0 = const()[name = string("op_1372_pad_type_0"), val = string("valid")];
tensor<int32, [2]> var_1372_strides_0 = const()[name = string("op_1372_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> var_1372_pad_0 = const()[name = string("op_1372_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> var_1372_dilations_0 = const()[name = string("op_1372_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 var_1372_groups_0 = const()[name = string("op_1372_groups_0"), val = int32(1)];
tensor<fp16, [1, 3072, 1, 1]> var_1372 = conv(dilations = var_1372_dilations_0, groups = var_1372_groups_0, pad = var_1372_pad_0, pad_type = var_1372_pad_type_0, strides = var_1372_strides_0, weight = model_model_layers_20_self_attn_q_proj_weight_palettized, x = var_1365)[name = string("op_1372")];
tensor<int32, [4]> var_1373 = const()[name = string("op_1373"), val = tensor<int32, [4]>([1, 24, 1, 128])];
tensor<fp16, [1, 24, 1, 128]> var_1374 = reshape(shape = var_1373, x = var_1372)[name = string("op_1374")];
string var_1381_pad_type_0 = const()[name = string("op_1381_pad_type_0"), val = string("valid")];
tensor<int32, [2]> var_1381_strides_0 = const()[name = string("op_1381_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> var_1381_pad_0 = const()[name = string("op_1381_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> var_1381_dilations_0 = const()[name = string("op_1381_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 var_1381_groups_0 = const()[name = string("op_1381_groups_0"), val = int32(1)];
tensor<fp16, [1, 1024, 1, 1]> var_1381 = conv(dilations = var_1381_dilations_0, groups = var_1381_groups_0, pad = var_1381_pad_0, pad_type = var_1381_pad_type_0, strides = var_1381_strides_0, weight = model_model_layers_20_self_attn_k_proj_weight_palettized, x = var_1365)[name = string("op_1381")];
tensor<int32, [4]> var_1382 = const()[name = string("op_1382"), val = tensor<int32, [4]>([1, 8, 1, 128])];
tensor<fp16, [1, 8, 1, 128]> var_1383 = reshape(shape = var_1382, x = var_1381)[name = string("op_1383")];
string var_1390_pad_type_0 = const()[name = string("op_1390_pad_type_0"), val = string("valid")];
tensor<int32, [2]> var_1390_strides_0 = const()[name = string("op_1390_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> var_1390_pad_0 = const()[name = string("op_1390_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> var_1390_dilations_0 = const()[name = string("op_1390_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 var_1390_groups_0 = const()[name = string("op_1390_groups_0"), val = int32(1)];
tensor<fp16, [1, 1024, 1, 1]> var_1390 = conv(dilations = var_1390_dilations_0, groups = var_1390_groups_0, pad = var_1390_pad_0, pad_type = var_1390_pad_type_0, strides = var_1390_strides_0, weight = model_model_layers_20_self_attn_v_proj_weight_palettized, x = var_1365)[name = string("op_1390")];
tensor<int32, [4]> var_1391 = const()[name = string("op_1391"), val = tensor<int32, [4]>([1, 8, 1, 128])];
tensor<fp16, [1, 8, 1, 128]> var_1392 = reshape(shape = var_1391, x = var_1390)[name = string("op_1392")];
tensor<int32, [4]> x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor<int32, [4]>([1, 24, 1, 64])];
tensor<bool, [4]> x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 24, 1, 64]> x1_25 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = var_1374)[name = string("x1_25")];
tensor<int32, [4]> x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor<int32, [4]>([1, 24, 1, 128])];
tensor<bool, [4]> x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 24, 1, 64]> x2_25 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = var_1374)[name = string("x2_25")];
tensor<fp16, [1, 24, 1, 64]> var_1406_cast_fp16 = mul(x = x1_25, y = cos_3_cast_fp16)[name = string("op_1406_cast_fp16")];
tensor<fp16, [1, 24, 1, 64]> var_1407_cast_fp16 = mul(x = x2_25, y = sin_3_cast_fp16)[name = string("op_1407_cast_fp16")];
tensor<fp16, [1, 24, 1, 64]> var_1408_cast_fp16 = sub(x = var_1406_cast_fp16, y = var_1407_cast_fp16)[name = string("op_1408_cast_fp16")];
tensor<fp16, [1, 24, 1, 64]> var_1409_cast_fp16 = mul(x = x2_25, y = cos_3_cast_fp16)[name = string("op_1409_cast_fp16")];
tensor<fp16, [1, 24, 1, 64]> var_1410_cast_fp16 = mul(x = x1_25, y = sin_3_cast_fp16)[name = string("op_1410_cast_fp16")];
tensor<fp16, [1, 24, 1, 64]> var_1411_cast_fp16 = add(x = var_1409_cast_fp16, y = var_1410_cast_fp16)[name = string("op_1411_cast_fp16")];
bool rotated_25_interleave_0 = const()[name = string("rotated_25_interleave_0"), val = bool(false)];
tensor<fp16, [1, 24, 1, 128]> rotated_25_cast_fp16 = concat(axis = var_52, interleave = rotated_25_interleave_0, values = (var_1408_cast_fp16, var_1411_cast_fp16))[name = string("rotated_25_cast_fp16")];
tensor<int32, [4]> x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_end_0 = const()[name = string("x1_end_0"), val = tensor<int32, [4]>([1, 8, 1, 64])];
tensor<bool, [4]> x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 8, 1, 64]> x1 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = var_1383)[name = string("x1")];
tensor<int32, [4]> x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_end_0 = const()[name = string("x2_end_0"), val = tensor<int32, [4]>([1, 8, 1, 128])];
tensor<bool, [4]> x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 8, 1, 64]> x2 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = var_1383)[name = string("x2")];
tensor<fp16, [1, 8, 1, 64]> var_1427_cast_fp16 = mul(x = x1, y = cos_3_cast_fp16)[name = string("op_1427_cast_fp16")];
tensor<fp16, [1, 8, 1, 64]> var_1428_cast_fp16 = mul(x = x2, y = sin_3_cast_fp16)[name = string("op_1428_cast_fp16")];
tensor<fp16, [1, 8, 1, 64]> var_1429_cast_fp16 = sub(x = var_1427_cast_fp16, y = var_1428_cast_fp16)[name = string("op_1429_cast_fp16")];
tensor<fp16, [1, 8, 1, 64]> var_1430_cast_fp16 = mul(x = x2, y = cos_3_cast_fp16)[name = string("op_1430_cast_fp16")];
tensor<fp16, [1, 8, 1, 64]> var_1431_cast_fp16 = mul(x = x1, y = sin_3_cast_fp16)[name = string("op_1431_cast_fp16")];
tensor<fp16, [1, 8, 1, 64]> var_1432_cast_fp16 = add(x = var_1430_cast_fp16, y = var_1431_cast_fp16)[name = string("op_1432_cast_fp16")];
bool rotated_interleave_0 = const()[name = string("rotated_interleave_0"), val = bool(false)];
tensor<fp16, [1, 8, 1, 128]> rotated_cast_fp16 = concat(axis = var_52, interleave = rotated_interleave_0, values = (var_1429_cast_fp16, var_1432_cast_fp16))[name = string("rotated_cast_fp16")];
tensor<int32, [1]> expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor<int32, [1]>([20])];
tensor<int32, [1]> expand_dims_73 = const()[name = string("expand_dims_73"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_75 = const()[name = string("expand_dims_75"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_76 = const()[name = string("expand_dims_76"), val = tensor<int32, [1]>([21])];
int32 concat_50_axis_0 = const()[name = string("concat_50_axis_0"), val = int32(0)];
bool concat_50_interleave_0 = const()[name = string("concat_50_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_50 = concat(axis = concat_50_axis_0, interleave = concat_50_interleave_0, values = (expand_dims_72, expand_dims_73, current_pos, expand_dims_75))[name = string("concat_50")];
tensor<int32, [1]> concat_51_values1_0 = const()[name = string("concat_51_values1_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> concat_51_values3_0 = const()[name = string("concat_51_values3_0"), val = tensor<int32, [1]>([0])];
int32 concat_51_axis_0 = const()[name = string("concat_51_axis_0"), val = int32(0)];
bool concat_51_interleave_0 = const()[name = string("concat_51_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_51 = concat(axis = concat_51_axis_0, interleave = concat_51_interleave_0, values = (expand_dims_76, concat_51_values1_0, var_327, concat_51_values3_0))[name = string("concat_51")];
tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_13_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_50, begin_mask = model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0, end = concat_51, end_mask = model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_13_stride_0, update = rotated_cast_fp16, x = coreml_update_state_25)[name = string("model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16")];
write_state(data = model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_12_write_state")];
tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_26 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_12")];
tensor<int32, [1]> expand_dims_78 = const()[name = string("expand_dims_78"), val = tensor<int32, [1]>([48])];
tensor<int32, [1]> expand_dims_79 = const()[name = string("expand_dims_79"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_81 = const()[name = string("expand_dims_81"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor<int32, [1]>([49])];
int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)];
bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (expand_dims_78, expand_dims_79, current_pos, expand_dims_81))[name = string("concat_54")];
tensor<int32, [1]> concat_55_values1_0 = const()[name = string("concat_55_values1_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> concat_55_values3_0 = const()[name = string("concat_55_values3_0"), val = tensor<int32, [1]>([0])];
int32 concat_55_axis_0 = const()[name = string("concat_55_axis_0"), val = int32(0)];
bool concat_55_interleave_0 = const()[name = string("concat_55_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_55 = concat(axis = concat_55_axis_0, interleave = concat_55_interleave_0, values = (expand_dims_82, concat_55_values1_0, var_327, concat_55_values3_0))[name = string("concat_55")];
tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_14_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_54, begin_mask = model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0, end = concat_55, end_mask = model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_14_stride_0, update = var_1392, x = coreml_update_state_26)[name = string("model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16")];
write_state(data = model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_13_write_state")];
tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_27 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_13")];
tensor<int32, [4]> var_1452_begin_0 = const()[name = string("op_1452_begin_0"), val = tensor<int32, [4]>([20, 0, 0, 0])];
tensor<int32, [4]> var_1452_end_0 = const()[name = string("op_1452_end_0"), val = tensor<int32, [4]>([21, 8, 1024, 128])];
tensor<bool, [4]> var_1452_end_mask_0 = const()[name = string("op_1452_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<fp16, [1, 8, 1024, 128]> var_1452_cast_fp16 = slice_by_index(begin = var_1452_begin_0, end = var_1452_end_0, end_mask = var_1452_end_mask_0, x = coreml_update_state_27)[name = string("op_1452_cast_fp16")];
tensor<int32, [1]> K_layer_cache_axes_0 = const()[name = string("K_layer_cache_axes_0"), val = tensor<int32, [1]>([0])];
tensor<fp16, [8, 1024, 128]> K_layer_cache_cast_fp16 = squeeze(axes = K_layer_cache_axes_0, x = var_1452_cast_fp16)[name = string("K_layer_cache_cast_fp16")];
tensor<int32, [4]> var_1454_begin_0 = const()[name = string("op_1454_begin_0"), val = tensor<int32, [4]>([48, 0, 0, 0])];
tensor<int32, [4]> var_1454_end_0 = const()[name = string("op_1454_end_0"), val = tensor<int32, [4]>([49, 8, 1024, 128])];
tensor<bool, [4]> var_1454_end_mask_0 = const()[name = string("op_1454_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<fp16, [1, 8, 1024, 128]> var_1454_cast_fp16 = slice_by_index(begin = var_1454_begin_0, end = var_1454_end_0, end_mask = var_1454_end_mask_0, x = coreml_update_state_27)[name = string("op_1454_cast_fp16")];
tensor<int32, [1]> V_layer_cache_axes_0 = const()[name = string("V_layer_cache_axes_0"), val = tensor<int32, [1]>([0])];
tensor<fp16, [8, 1024, 128]> V_layer_cache_cast_fp16 = squeeze(axes = V_layer_cache_axes_0, x = var_1454_cast_fp16)[name = string("V_layer_cache_cast_fp16")];
tensor<int32, [1]> x_179_axes_0 = const()[name = string("x_179_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [8, 1, 1024, 128]> x_179_cast_fp16 = expand_dims(axes = x_179_axes_0, x = K_layer_cache_cast_fp16)[name = string("x_179_cast_fp16")];
tensor<int32, [4]> var_1463 = const()[name = string("op_1463"), val = tensor<int32, [4]>([1, 3, 1, 1])];
tensor<fp16, [8, 3, 1024, 128]> x_181_cast_fp16 = tile(reps = var_1463, x = x_179_cast_fp16)[name = string("x_181_cast_fp16")];
tensor<int32, [4]> var_1467 = const()[name = string("op_1467"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
tensor<fp16, [1, 24, 1024, 128]> key_states_cast_fp16 = reshape(shape = var_1467, x = x_181_cast_fp16)[name = string("key_states_cast_fp16")];
tensor<int32, [1]> x_185_axes_0 = const()[name = string("x_185_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [8, 1, 1024, 128]> x_185_cast_fp16 = expand_dims(axes = x_185_axes_0, x = V_layer_cache_cast_fp16)[name = string("x_185_cast_fp16")];
tensor<int32, [4]> var_1470 = const()[name = string("op_1470"), val = tensor<int32, [4]>([1, 3, 1, 1])];
tensor<fp16, [8, 3, 1024, 128]> x_187_cast_fp16 = tile(reps = var_1470, x = x_185_cast_fp16)[name = string("x_187_cast_fp16")];
tensor<int32, [4]> var_1474 = const()[name = string("op_1474"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
tensor<fp16, [1, 24, 1024, 128]> value_states_cast_fp16 = reshape(shape = var_1474, x = x_187_cast_fp16)[name = string("value_states_cast_fp16")];
bool var_1477_transpose_x_1 = const()[name = string("op_1477_transpose_x_1"), val = bool(false)];
bool var_1477_transpose_y_1 = const()[name = string("op_1477_transpose_y_1"), val = bool(true)];
tensor<fp16, [1, 24, 1, 1024]> var_1477_cast_fp16 = matmul(transpose_x = var_1477_transpose_x_1, transpose_y = var_1477_transpose_y_1, x = rotated_25_cast_fp16, y = key_states_cast_fp16)[name = string("op_1477_cast_fp16")];
fp16 var_1478_to_fp16 = const()[name = string("op_1478_to_fp16"), val = fp16(0x1.6ap-4)];
tensor<fp16, [1, 24, 1, 1024]> attn_weights_25_cast_fp16 = mul(x = var_1477_cast_fp16, y = var_1478_to_fp16)[name = string("attn_weights_25_cast_fp16")];
tensor<fp16, [1, 24, 1, 1024]> x_189_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = causal_mask)[name = string("x_189_cast_fp16")];
tensor<int32, [1]> reduce_max_6_axes_0 = const()[name = string("reduce_max_6_axes_0"), val = tensor<int32, [1]>([-1])];
bool reduce_max_6_keep_dims_0 = const()[name = string("reduce_max_6_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 24, 1, 1]> reduce_max_6_cast_fp16 = reduce_max(axes = reduce_max_6_axes_0, keep_dims = reduce_max_6_keep_dims_0, x = x_189_cast_fp16)[name = string("reduce_max_6_cast_fp16")];
tensor<fp16, [1, 24, 1, 1024]> x_191_cast_fp16 = sub(x = x_189_cast_fp16, y = reduce_max_6_cast_fp16)[name = string("x_191_cast_fp16")];
tensor<fp16, [1, 24, 1, 1024]> exp_x_cast_fp16 = exp(x = x_191_cast_fp16)[name = string("exp_x_cast_fp16")];
tensor<int32, [1]> var_1489_axes_0 = const()[name = string("op_1489_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_1489_keep_dims_0 = const()[name = string("op_1489_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 24, 1, 1]> var_1489_cast_fp16 = reduce_sum(axes = var_1489_axes_0, keep_dims = var_1489_keep_dims_0, x = exp_x_cast_fp16)[name = string("op_1489_cast_fp16")];
tensor<fp16, [1, 24, 1, 1024]> attn_weights_cast_fp16 = real_div(x = exp_x_cast_fp16, y = var_1489_cast_fp16)[name = string("attn_weights_cast_fp16")];
bool attn_output_37_transpose_x_0 = const()[name = string("attn_output_37_transpose_x_0"), val = bool(false)];
bool attn_output_37_transpose_y_0 = const()[name = string("attn_output_37_transpose_y_0"), val = bool(false)];
tensor<fp16, [1, 24, 1, 128]> attn_output_37_cast_fp16 = matmul(transpose_x = attn_output_37_transpose_x_0, transpose_y = attn_output_37_transpose_y_0, x = attn_weights_cast_fp16, y = value_states_cast_fp16)[name = string("attn_output_37_cast_fp16")];
tensor<int32, [4]> var_1492_perm_0 = const()[name = string("op_1492_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_1494 = const()[name = string("op_1494"), val = tensor<int32, [3]>([1, 1, 3072])];
tensor<fp16, [1, 1, 24, 128]> var_1492_cast_fp16 = transpose(perm = var_1492_perm_0, x = attn_output_37_cast_fp16)[name = string("transpose_2")];
tensor<fp16, [1, 1, 3072]> input_89_cast_fp16 = reshape(shape = var_1494, x = var_1492_cast_fp16)[name = string("input_89_cast_fp16")];
tensor<fp16, [3072, 3072]> model_model_layers_20_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415566912))), lut = tensor<fp16, [384, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(420285568))))[name = string("model_model_layers_20_self_attn_o_proj_weight_promoted_to_fp16_palettized")];
tensor<fp16, [1, 1, 3072]> linear_6_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_20_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_89_cast_fp16)[name = string("linear_6_cast_fp16")];
tensor<fp16, [1, 1, 3072]> hidden_states_53_cast_fp16 = add(x = hidden_states_49_cast_fp16, y = linear_6_cast_fp16)[name = string("hidden_states_53_cast_fp16")];
tensor<int32, [1]> mean_axes_0 = const()[name = string("mean_axes_0"), val = tensor<int32, [1]>([-1])];
bool mean_keep_dims_0 = const()[name = string("mean_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 1, 1]> mean_cast_fp16 = reduce_mean(axes = mean_axes_0, keep_dims = mean_keep_dims_0, x = hidden_states_53_cast_fp16)[name = string("mean_cast_fp16")];
tensor<fp16, [1, 1, 3072]> input_91_cast_fp16 = sub(x = hidden_states_53_cast_fp16, y = mean_cast_fp16)[name = string("input_91_cast_fp16")];
tensor<int32, [1]> var_1505_axes_0 = const()[name = string("op_1505_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [3072]> model_model_layers_20_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_20_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(420297920)))];
tensor<fp16, [1, 1, 3072]> var_1505_cast_fp16 = layer_norm(axes = var_1505_axes_0, epsilon = var_47_to_fp16, gamma = model_model_layers_20_post_attention_layernorm_weight_to_fp16, x = input_91_cast_fp16)[name = string("op_1505_cast_fp16")];
tensor<int32, [3]> var_1512 = const()[name = string("op_1512"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [1]> input_93_axes_0 = const()[name = string("input_93_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 3072, 1]> var_1513 = transpose(perm = var_1512, x = var_1505_cast_fp16)[name = string("transpose_1")];
tensor<fp16, [1, 3072, 1, 1]> input_93 = expand_dims(axes = input_93_axes_0, x = var_1513)[name = string("input_93")];
string input_95_pad_type_0 = const()[name = string("input_95_pad_type_0"), val = string("valid")];
tensor<int32, [2]> input_95_strides_0 = const()[name = string("input_95_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> input_95_pad_0 = const()[name = string("input_95_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> input_95_dilations_0 = const()[name = string("input_95_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 input_95_groups_0 = const()[name = string("input_95_groups_0"), val = int32(1)];
tensor<fp16, [1, 8192, 1, 1]> input_95 = conv(dilations = input_95_dilations_0, groups = input_95_groups_0, pad = input_95_pad_0, pad_type = input_95_pad_type_0, strides = input_95_strides_0, weight = model_model_layers_20_mlp_gate_proj_weight_palettized, x = input_93)[name = string("input_95")];
string up_states_pad_type_0 = const()[name = string("up_states_pad_type_0"), val = string("valid")];
tensor<int32, [2]> up_states_strides_0 = const()[name = string("up_states_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> up_states_pad_0 = const()[name = string("up_states_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> up_states_dilations_0 = const()[name = string("up_states_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 up_states_groups_0 = const()[name = string("up_states_groups_0"), val = int32(1)];
tensor<fp16, [1, 8192, 1, 1]> up_states = conv(dilations = up_states_dilations_0, groups = up_states_groups_0, pad = up_states_pad_0, pad_type = up_states_pad_type_0, strides = up_states_strides_0, weight = model_model_layers_20_mlp_up_proj_weight_palettized, x = input_93)[name = string("up_states")];
tensor<fp16, [1, 8192, 1, 1]> gate_states = silu(x = input_95)[name = string("gate_states")];
tensor<fp16, [1, 8192, 1, 1]> input = mul(x = gate_states, y = up_states)[name = string("input")];
string hidden_states_pad_type_0 = const()[name = string("hidden_states_pad_type_0"), val = string("valid")];
tensor<int32, [2]> hidden_states_strides_0 = const()[name = string("hidden_states_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> hidden_states_pad_0 = const()[name = string("hidden_states_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> hidden_states_dilations_0 = const()[name = string("hidden_states_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 hidden_states_groups_0 = const()[name = string("hidden_states_groups_0"), val = int32(1)];
tensor<fp16, [1, 3072, 1, 1]> hidden_states_1 = conv(dilations = hidden_states_dilations_0, groups = hidden_states_groups_0, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = hidden_states_strides_0, weight = model_model_layers_20_mlp_down_proj_weight_palettized, x = input)[name = string("hidden_states")];
tensor<int32, [1]> var_1535_axes_0 = const()[name = string("op_1535_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 3072, 1]> var_1535 = squeeze(axes = var_1535_axes_0, x = hidden_states_1)[name = string("op_1535")];
tensor<int32, [3]> var_1536 = const()[name = string("op_1536"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 1, 3072]> var_1537 = transpose(perm = var_1536, x = var_1535)[name = string("transpose_0")];
tensor<fp16, [1, 1, 3072]> output_hidden_states = add(x = hidden_states_53_cast_fp16, y = var_1537)[name = string("op_1538_cast_fp16")];
tensor<int32, [1]> position_ids_tmp = identity(x = position_ids)[name = string("position_ids_tmp")];
} -> (output_hidden_states);
func prefill<ios18>(tensor<fp16, [1, 1, 64, 1024]> causal_mask, tensor<int32, [1]> current_pos, tensor<fp16, [1, 64, 3072]> hidden_states, state<tensor<fp16, [56, 8, 1024, 128]>> model_model_kv_cache_0, tensor<int32, [64]> position_ids) {
tensor<fp16, [3072, 3072, 1, 1]> model_model_layers_14_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4718720))))[name = string("model_model_layers_14_self_attn_q_proj_weight_palettized")];
tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_14_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4731072))), lut = tensor<fp16, [128, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6304000))))[name = string("model_model_layers_14_self_attn_k_proj_weight_palettized")];
tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_14_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(6308160))), lut = tensor<fp16, [128, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7881088))))[name = string("model_model_layers_14_self_attn_v_proj_weight_palettized")];
tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_14_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7885248))), lut = tensor<fp16, [1024, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20468224))))[name = string("model_model_layers_14_mlp_gate_proj_weight_palettized")];
tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_14_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20501056))), lut = tensor<fp16, [1024, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33084032))))[name = string("model_model_layers_14_mlp_up_proj_weight_palettized")];
tensor<fp16, [3072, 8192, 1, 1]> model_model_layers_14_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 8192, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33116864))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45699840))))[name = string("model_model_layers_14_mlp_down_proj_weight_palettized")];
tensor<fp16, [3072, 3072, 1, 1]> model_model_layers_15_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45712192))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50430848))))[name = string("model_model_layers_15_self_attn_q_proj_weight_palettized")];
tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_15_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50443200))), lut = tensor<fp16, [128, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52016128))))[name = string("model_model_layers_15_self_attn_k_proj_weight_palettized")];
tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_15_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52020288))), lut = tensor<fp16, [128, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53593216))))[name = string("model_model_layers_15_self_attn_v_proj_weight_palettized")];
tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_15_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53597376))), lut = tensor<fp16, [1024, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66180352))))[name = string("model_model_layers_15_mlp_gate_proj_weight_palettized")];
tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_15_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66213184))), lut = tensor<fp16, [1024, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78796160))))[name = string("model_model_layers_15_mlp_up_proj_weight_palettized")];
tensor<fp16, [3072, 8192, 1, 1]> model_model_layers_15_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 8192, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(78828992))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91411968))))[name = string("model_model_layers_15_mlp_down_proj_weight_palettized")];
tensor<fp16, [3072, 3072, 1, 1]> model_model_layers_16_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91424320))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96142976))))[name = string("model_model_layers_16_self_attn_q_proj_weight_palettized")];
tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_16_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96155328))), lut = tensor<fp16, [128, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97728256))))[name = string("model_model_layers_16_self_attn_k_proj_weight_palettized")];
tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_16_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97732416))), lut = tensor<fp16, [128, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99305344))))[name = string("model_model_layers_16_self_attn_v_proj_weight_palettized")];
tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_16_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99309504))), lut = tensor<fp16, [1024, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111892480))))[name = string("model_model_layers_16_mlp_gate_proj_weight_palettized")];
tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_16_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111925312))), lut = tensor<fp16, [1024, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124508288))))[name = string("model_model_layers_16_mlp_up_proj_weight_palettized")];
tensor<fp16, [3072, 8192, 1, 1]> model_model_layers_16_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 8192, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124541120))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137124096))))[name = string("model_model_layers_16_mlp_down_proj_weight_palettized")];
tensor<fp16, [3072, 3072, 1, 1]> model_model_layers_17_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137136448))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141855104))))[name = string("model_model_layers_17_self_attn_q_proj_weight_palettized")];
tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_17_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141867456))), lut = tensor<fp16, [128, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143440384))))[name = string("model_model_layers_17_self_attn_k_proj_weight_palettized")];
tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_17_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143444544))), lut = tensor<fp16, [128, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145017472))))[name = string("model_model_layers_17_self_attn_v_proj_weight_palettized")];
tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_17_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145021632))), lut = tensor<fp16, [1024, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157604608))))[name = string("model_model_layers_17_mlp_gate_proj_weight_palettized")];
tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_17_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157637440))), lut = tensor<fp16, [1024, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170220416))))[name = string("model_model_layers_17_mlp_up_proj_weight_palettized")];
tensor<fp16, [3072, 8192, 1, 1]> model_model_layers_17_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 8192, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170253248))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182836224))))[name = string("model_model_layers_17_mlp_down_proj_weight_palettized")];
tensor<fp16, [3072, 3072, 1, 1]> model_model_layers_18_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182848576))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187567232))))[name = string("model_model_layers_18_self_attn_q_proj_weight_palettized")];
tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_18_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187579584))), lut = tensor<fp16, [128, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(189152512))))[name = string("model_model_layers_18_self_attn_k_proj_weight_palettized")];
tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_18_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(189156672))), lut = tensor<fp16, [128, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190729600))))[name = string("model_model_layers_18_self_attn_v_proj_weight_palettized")];
tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_18_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190733760))), lut = tensor<fp16, [1024, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203316736))))[name = string("model_model_layers_18_mlp_gate_proj_weight_palettized")];
tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_18_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203349568))), lut = tensor<fp16, [1024, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(215932544))))[name = string("model_model_layers_18_mlp_up_proj_weight_palettized")];
tensor<fp16, [3072, 8192, 1, 1]> model_model_layers_18_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 8192, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(215965376))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(228548352))))[name = string("model_model_layers_18_mlp_down_proj_weight_palettized")];
tensor<fp16, [3072, 3072, 1, 1]> model_model_layers_19_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(228560704))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233279360))))[name = string("model_model_layers_19_self_attn_q_proj_weight_palettized")];
tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_19_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233291712))), lut = tensor<fp16, [128, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234864640))))[name = string("model_model_layers_19_self_attn_k_proj_weight_palettized")];
tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_19_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234868800))), lut = tensor<fp16, [128, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236441728))))[name = string("model_model_layers_19_self_attn_v_proj_weight_palettized")];
tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_19_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236445888))), lut = tensor<fp16, [1024, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249028864))))[name = string("model_model_layers_19_mlp_gate_proj_weight_palettized")];
tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_19_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249061696))), lut = tensor<fp16, [1024, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261644672))))[name = string("model_model_layers_19_mlp_up_proj_weight_palettized")];
tensor<fp16, [3072, 8192, 1, 1]> model_model_layers_19_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 8192, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261677504))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(274260480))))[name = string("model_model_layers_19_mlp_down_proj_weight_palettized")];
tensor<fp16, [3072, 3072, 1, 1]> model_model_layers_20_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(274272832))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(278991488))))[name = string("model_model_layers_20_self_attn_q_proj_weight_palettized")];
tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_20_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279003840))), lut = tensor<fp16, [128, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280576768))))[name = string("model_model_layers_20_self_attn_k_proj_weight_palettized")];
tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_20_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280580928))), lut = tensor<fp16, [128, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282153856))))[name = string("model_model_layers_20_self_attn_v_proj_weight_palettized")];
tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_20_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(282158016))), lut = tensor<fp16, [1024, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294740992))))[name = string("model_model_layers_20_mlp_gate_proj_weight_palettized")];
tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_20_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294773824))), lut = tensor<fp16, [1024, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307356800))))[name = string("model_model_layers_20_mlp_up_proj_weight_palettized")];
tensor<fp16, [3072, 8192, 1, 1]> model_model_layers_20_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 8192, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307389632))), lut = tensor<fp16, [384, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319972608))))[name = string("model_model_layers_20_mlp_down_proj_weight_palettized")];
int32 var_47 = const()[name = string("op_47"), val = int32(-1)];
int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)];
tensor<bool, [64]> greater_equal_0 = greater_equal(x = position_ids, y = greater_equal_0_y_0)[name = string("greater_equal_0")];
int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(131072)];
tensor<int32, [64]> add_0 = add(x = position_ids, y = slice_by_index_0)[name = string("add_0")];
tensor<int32, [64]> select_0 = select(a = position_ids, b = add_0, cond = greater_equal_0)[name = string("select_0")];
int32 var_239_axis_0 = const()[name = string("op_239_axis_0"), val = int32(1)];
int32 var_239_batch_dims_0 = const()[name = string("op_239_batch_dims_0"), val = int32(0)];
bool var_239_validate_indices_0 = const()[name = string("op_239_validate_indices_0"), val = bool(false)];
tensor<fp16, [1, 131072, 128]> var_58_to_fp16 = const()[name = string("op_58_to_fp16"), val = tensor<fp16, [1, 131072, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353539456)))];
tensor<fp16, [1, 64, 128]> var_239_cast_fp16 = gather(axis = var_239_axis_0, batch_dims = var_239_batch_dims_0, indices = select_0, validate_indices = var_239_validate_indices_0, x = var_58_to_fp16)[name = string("op_239_cast_fp16")];
tensor<int32, [4]> var_240 = const()[name = string("op_240"), val = tensor<int32, [4]>([1, 64, 1, 128])];
tensor<fp16, [1, 64, 1, 128]> cos_1_cast_fp16 = reshape(shape = var_240, x = var_239_cast_fp16)[name = string("cos_1_cast_fp16")];
int32 var_244_axis_0 = const()[name = string("op_244_axis_0"), val = int32(1)];
int32 var_244_batch_dims_0 = const()[name = string("op_244_batch_dims_0"), val = int32(0)];
bool var_244_validate_indices_0 = const()[name = string("op_244_validate_indices_0"), val = bool(false)];
tensor<fp16, [1, 131072, 128]> var_53_to_fp16 = const()[name = string("op_53_to_fp16"), val = tensor<fp16, [1, 131072, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319984960)))];
tensor<fp16, [1, 64, 128]> var_244_cast_fp16 = gather(axis = var_244_axis_0, batch_dims = var_244_batch_dims_0, indices = select_0, validate_indices = var_244_validate_indices_0, x = var_53_to_fp16)[name = string("op_244_cast_fp16")];
tensor<int32, [4]> var_245 = const()[name = string("op_245"), val = tensor<int32, [4]>([1, 64, 1, 128])];
tensor<fp16, [1, 64, 1, 128]> sin_1_cast_fp16 = reshape(shape = var_245, x = var_244_cast_fp16)[name = string("sin_1_cast_fp16")];
tensor<int32, [1]> mean_1_axes_0 = const()[name = string("mean_1_axes_0"), val = tensor<int32, [1]>([-1])];
bool mean_1_keep_dims_0 = const()[name = string("mean_1_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 64, 1]> mean_1_cast_fp16 = reduce_mean(axes = mean_1_axes_0, keep_dims = mean_1_keep_dims_0, x = hidden_states)[name = string("mean_1_cast_fp16")];
tensor<fp16, [1, 64, 3072]> input_1_cast_fp16 = sub(x = hidden_states, y = mean_1_cast_fp16)[name = string("input_1_cast_fp16")];
tensor<int32, [1]> var_255_axes_0 = const()[name = string("op_255_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [3072]> model_model_layers_14_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_14_input_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387093952)))];
fp16 var_49_to_fp16 = const()[name = string("op_49_to_fp16"), val = fp16(0x1.5p-17)];
tensor<fp16, [1, 64, 3072]> var_255_cast_fp16 = layer_norm(axes = var_255_axes_0, epsilon = var_49_to_fp16, gamma = model_model_layers_14_input_layernorm_weight_to_fp16, x = input_1_cast_fp16)[name = string("op_255_cast_fp16")];
tensor<int32, [3]> var_259 = const()[name = string("op_259"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [1]> var_261_axes_0 = const()[name = string("op_261_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 3072, 64]> var_260 = transpose(perm = var_259, x = var_255_cast_fp16)[name = string("transpose_50")];
tensor<fp16, [1, 3072, 1, 64]> var_261 = expand_dims(axes = var_261_axes_0, x = var_260)[name = string("op_261")];
string query_states_1_pad_type_0 = const()[name = string("query_states_1_pad_type_0"), val = string("valid")];
tensor<int32, [2]> query_states_1_strides_0 = const()[name = string("query_states_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> query_states_1_pad_0 = const()[name = string("query_states_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> query_states_1_dilations_0 = const()[name = string("query_states_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 query_states_1_groups_0 = const()[name = string("query_states_1_groups_0"), val = int32(1)];
tensor<fp16, [1, 3072, 1, 64]> query_states_1 = conv(dilations = query_states_1_dilations_0, groups = query_states_1_groups_0, pad = query_states_1_pad_0, pad_type = query_states_1_pad_type_0, strides = query_states_1_strides_0, weight = model_model_layers_14_self_attn_q_proj_weight_palettized, x = var_261)[name = string("query_states_1")];
string key_states_1_pad_type_0 = const()[name = string("key_states_1_pad_type_0"), val = string("valid")];
tensor<int32, [2]> key_states_1_strides_0 = const()[name = string("key_states_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> key_states_1_pad_0 = const()[name = string("key_states_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> key_states_1_dilations_0 = const()[name = string("key_states_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 key_states_1_groups_0 = const()[name = string("key_states_1_groups_0"), val = int32(1)];
tensor<fp16, [1, 1024, 1, 64]> key_states_1 = conv(dilations = key_states_1_dilations_0, groups = key_states_1_groups_0, pad = key_states_1_pad_0, pad_type = key_states_1_pad_type_0, strides = key_states_1_strides_0, weight = model_model_layers_14_self_attn_k_proj_weight_palettized, x = var_261)[name = string("key_states_1")];
string value_states_1_pad_type_0 = const()[name = string("value_states_1_pad_type_0"), val = string("valid")];
tensor<int32, [2]> value_states_1_strides_0 = const()[name = string("value_states_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> value_states_1_pad_0 = const()[name = string("value_states_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> value_states_1_dilations_0 = const()[name = string("value_states_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 value_states_1_groups_0 = const()[name = string("value_states_1_groups_0"), val = int32(1)];
tensor<fp16, [1, 1024, 1, 64]> value_states_1 = conv(dilations = value_states_1_dilations_0, groups = value_states_1_groups_0, pad = value_states_1_pad_0, pad_type = value_states_1_pad_type_0, strides = value_states_1_strides_0, weight = model_model_layers_14_self_attn_v_proj_weight_palettized, x = var_261)[name = string("value_states_1")];
tensor<int32, [4]> var_281 = const()[name = string("op_281"), val = tensor<int32, [4]>([1, 24, 128, 64])];
tensor<fp16, [1, 24, 128, 64]> var_282 = reshape(shape = var_281, x = query_states_1)[name = string("op_282")];
tensor<int32, [4]> var_283 = const()[name = string("op_283"), val = tensor<int32, [4]>([0, 1, 3, 2])];
tensor<int32, [4]> var_285 = const()[name = string("op_285"), val = tensor<int32, [4]>([1, 8, 128, 64])];
tensor<fp16, [1, 8, 128, 64]> var_286 = reshape(shape = var_285, x = key_states_1)[name = string("op_286")];
tensor<int32, [4]> var_287 = const()[name = string("op_287"), val = tensor<int32, [4]>([0, 1, 3, 2])];
tensor<int32, [4]> var_289 = const()[name = string("op_289"), val = tensor<int32, [4]>([1, 8, 128, 64])];
tensor<fp16, [1, 8, 128, 64]> var_290 = reshape(shape = var_289, x = value_states_1)[name = string("op_290")];
tensor<int32, [4]> var_291 = const()[name = string("op_291"), val = tensor<int32, [4]>([0, 1, 3, 2])];
tensor<int32, [4]> var_293 = const()[name = string("op_293"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [4]> var_295 = const()[name = string("op_295"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [4]> x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor<int32, [4]>([1, 24, 64, 64])];
tensor<bool, [4]> x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 24, 64, 128]> x_1 = transpose(perm = var_283, x = var_282)[name = string("transpose_49")];
tensor<fp16, [1, 24, 64, 64]> x1_1 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = x_1)[name = string("x1_1")];
tensor<int32, [4]> x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor<int32, [4]>([1, 24, 64, 128])];
tensor<bool, [4]> x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 24, 64, 64]> x2_1 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = x_1)[name = string("x2_1")];
tensor<int32, [4]> cos_7_begin_0 = const()[name = string("cos_7_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> cos_7_end_0 = const()[name = string("cos_7_end_0"), val = tensor<int32, [4]>([1, 1, 64, 64])];
tensor<bool, [4]> cos_7_end_mask_0 = const()[name = string("cos_7_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 1, 64, 128]> cos_5 = transpose(perm = var_293, x = cos_1_cast_fp16)[name = string("transpose_48")];
tensor<fp16, [1, 1, 64, 64]> cos_7 = slice_by_index(begin = cos_7_begin_0, end = cos_7_end_0, end_mask = cos_7_end_mask_0, x = cos_5)[name = string("cos_7")];
tensor<int32, [4]> sin_7_begin_0 = const()[name = string("sin_7_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> sin_7_end_0 = const()[name = string("sin_7_end_0"), val = tensor<int32, [4]>([1, 1, 64, 64])];
tensor<bool, [4]> sin_7_end_mask_0 = const()[name = string("sin_7_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 1, 64, 128]> sin_5 = transpose(perm = var_295, x = sin_1_cast_fp16)[name = string("transpose_47")];
tensor<fp16, [1, 1, 64, 64]> sin_7 = slice_by_index(begin = sin_7_begin_0, end = sin_7_end_0, end_mask = sin_7_end_mask_0, x = sin_5)[name = string("sin_7")];
tensor<fp16, [1, 24, 64, 64]> var_309 = mul(x = x1_1, y = cos_7)[name = string("op_309")];
tensor<fp16, [1, 24, 64, 64]> var_310 = mul(x = x2_1, y = sin_7)[name = string("op_310")];
tensor<fp16, [1, 24, 64, 64]> var_311 = sub(x = var_309, y = var_310)[name = string("op_311")];
tensor<fp16, [1, 24, 64, 64]> var_312 = mul(x = x2_1, y = cos_7)[name = string("op_312")];
tensor<fp16, [1, 24, 64, 64]> var_313 = mul(x = x1_1, y = sin_7)[name = string("op_313")];
tensor<fp16, [1, 24, 64, 64]> var_314 = add(x = var_312, y = var_313)[name = string("op_314")];
bool rotated_1_interleave_0 = const()[name = string("rotated_1_interleave_0"), val = bool(false)];
tensor<fp16, [1, 24, 64, 128]> rotated_1 = concat(axis = var_47, interleave = rotated_1_interleave_0, values = (var_311, var_314))[name = string("rotated_1")];
tensor<int32, [4]> x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor<int32, [4]>([1, 8, 64, 64])];
tensor<bool, [4]> x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 8, 64, 128]> x_5 = transpose(perm = var_287, x = var_286)[name = string("transpose_46")];
tensor<fp16, [1, 8, 64, 64]> x1_3 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = x_5)[name = string("x1_3")];
tensor<int32, [4]> x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor<int32, [4]>([1, 8, 64, 128])];
tensor<bool, [4]> x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 8, 64, 64]> x2_3 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = x_5)[name = string("x2_3")];
tensor<fp16, [1, 8, 64, 64]> var_330 = mul(x = x1_3, y = cos_7)[name = string("op_330")];
tensor<fp16, [1, 8, 64, 64]> var_331 = mul(x = x2_3, y = sin_7)[name = string("op_331")];
tensor<fp16, [1, 8, 64, 64]> var_332 = sub(x = var_330, y = var_331)[name = string("op_332")];
tensor<fp16, [1, 8, 64, 64]> var_333 = mul(x = x2_3, y = cos_7)[name = string("op_333")];
tensor<fp16, [1, 8, 64, 64]> var_334 = mul(x = x1_3, y = sin_7)[name = string("op_334")];
tensor<fp16, [1, 8, 64, 64]> var_335 = add(x = var_333, y = var_334)[name = string("op_335")];
bool rotated_3_interleave_0 = const()[name = string("rotated_3_interleave_0"), val = bool(false)];
tensor<fp16, [1, 8, 64, 128]> rotated_3 = concat(axis = var_47, interleave = rotated_3_interleave_0, values = (var_332, var_335))[name = string("rotated_3")];
tensor<int32, [1]> seq_length_1 = const()[name = string("seq_length_1"), val = tensor<int32, [1]>([64])];
tensor<int32, [1]> var_344 = add(x = current_pos, y = seq_length_1)[name = string("op_344")];
tensor<fp16, [56, 8, 1024, 128]> read_state_0 = read_state(input = model_model_kv_cache_0)[name = string("read_state_0")];
tensor<int32, [1]> expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor<int32, [1]>([14])];
tensor<int32, [1]> expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor<int32, [1]>([15])];
int32 concat_2_axis_0 = const()[name = string("concat_2_axis_0"), val = int32(0)];
bool concat_2_interleave_0 = const()[name = string("concat_2_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_2 = concat(axis = concat_2_axis_0, interleave = concat_2_interleave_0, values = (expand_dims_0, expand_dims_1, current_pos, expand_dims_3))[name = string("concat_2")];
tensor<int32, [1]> concat_3_values1_0 = const()[name = string("concat_3_values1_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor<int32, [1]>([0])];
int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)];
bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_4, concat_3_values1_0, var_344, concat_3_values3_0))[name = string("concat_3")];
tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_1_stride_0, update = rotated_3, x = read_state_0)[name = string("model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16")];
write_state(data = model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_14_write_state")];
tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_14 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_14")];
tensor<int32, [1]> expand_dims_6 = const()[name = string("expand_dims_6"), val = tensor<int32, [1]>([42])];
tensor<int32, [1]> expand_dims_7 = const()[name = string("expand_dims_7"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_9 = const()[name = string("expand_dims_9"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_10 = const()[name = string("expand_dims_10"), val = tensor<int32, [1]>([43])];
int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)];
bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (expand_dims_6, expand_dims_7, current_pos, expand_dims_9))[name = string("concat_6")];
tensor<int32, [1]> concat_7_values1_0 = const()[name = string("concat_7_values1_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> concat_7_values3_0 = const()[name = string("concat_7_values3_0"), val = tensor<int32, [1]>([0])];
int32 concat_7_axis_0 = const()[name = string("concat_7_axis_0"), val = int32(0)];
bool concat_7_interleave_0 = const()[name = string("concat_7_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_7 = concat(axis = concat_7_axis_0, interleave = concat_7_interleave_0, values = (expand_dims_10, concat_7_values1_0, var_344, concat_7_values3_0))[name = string("concat_7")];
tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<fp16, [1, 8, 64, 128]> value_states_3 = transpose(perm = var_291, x = var_290)[name = string("transpose_45")];
tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_6, begin_mask = model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0, end = concat_7, end_mask = model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_2_stride_0, update = value_states_3, x = coreml_update_state_14)[name = string("model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16")];
write_state(data = model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_15_write_state")];
tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_15 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_15")];
tensor<int32, [4]> var_358_begin_0 = const()[name = string("op_358_begin_0"), val = tensor<int32, [4]>([14, 0, 0, 0])];
tensor<int32, [4]> var_358_end_0 = const()[name = string("op_358_end_0"), val = tensor<int32, [4]>([15, 8, 1024, 128])];
tensor<bool, [4]> var_358_end_mask_0 = const()[name = string("op_358_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<fp16, [1, 8, 1024, 128]> var_358_cast_fp16 = slice_by_index(begin = var_358_begin_0, end = var_358_end_0, end_mask = var_358_end_mask_0, x = coreml_update_state_15)[name = string("op_358_cast_fp16")];
tensor<int32, [1]> K_layer_cache_1_axes_0 = const()[name = string("K_layer_cache_1_axes_0"), val = tensor<int32, [1]>([0])];
tensor<fp16, [8, 1024, 128]> K_layer_cache_1_cast_fp16 = squeeze(axes = K_layer_cache_1_axes_0, x = var_358_cast_fp16)[name = string("K_layer_cache_1_cast_fp16")];
tensor<int32, [4]> var_360_begin_0 = const()[name = string("op_360_begin_0"), val = tensor<int32, [4]>([42, 0, 0, 0])];
tensor<int32, [4]> var_360_end_0 = const()[name = string("op_360_end_0"), val = tensor<int32, [4]>([43, 8, 1024, 128])];
tensor<bool, [4]> var_360_end_mask_0 = const()[name = string("op_360_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<fp16, [1, 8, 1024, 128]> var_360_cast_fp16 = slice_by_index(begin = var_360_begin_0, end = var_360_end_0, end_mask = var_360_end_mask_0, x = coreml_update_state_15)[name = string("op_360_cast_fp16")];
tensor<int32, [1]> V_layer_cache_1_axes_0 = const()[name = string("V_layer_cache_1_axes_0"), val = tensor<int32, [1]>([0])];
tensor<fp16, [8, 1024, 128]> V_layer_cache_1_cast_fp16 = squeeze(axes = V_layer_cache_1_axes_0, x = var_360_cast_fp16)[name = string("V_layer_cache_1_cast_fp16")];
tensor<int32, [1]> x_11_axes_0 = const()[name = string("x_11_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [8, 1, 1024, 128]> x_11_cast_fp16 = expand_dims(axes = x_11_axes_0, x = K_layer_cache_1_cast_fp16)[name = string("x_11_cast_fp16")];
tensor<int32, [4]> var_369 = const()[name = string("op_369"), val = tensor<int32, [4]>([1, 3, 1, 1])];
tensor<fp16, [8, 3, 1024, 128]> x_13_cast_fp16 = tile(reps = var_369, x = x_11_cast_fp16)[name = string("x_13_cast_fp16")];
tensor<int32, [4]> var_373 = const()[name = string("op_373"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
tensor<fp16, [1, 24, 1024, 128]> var_374_cast_fp16 = reshape(shape = var_373, x = x_13_cast_fp16)[name = string("op_374_cast_fp16")];
tensor<int32, [1]> x_17_axes_0 = const()[name = string("x_17_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [8, 1, 1024, 128]> x_17_cast_fp16 = expand_dims(axes = x_17_axes_0, x = V_layer_cache_1_cast_fp16)[name = string("x_17_cast_fp16")];
tensor<int32, [4]> var_376 = const()[name = string("op_376"), val = tensor<int32, [4]>([1, 3, 1, 1])];
tensor<fp16, [8, 3, 1024, 128]> x_19_cast_fp16 = tile(reps = var_376, x = x_17_cast_fp16)[name = string("x_19_cast_fp16")];
bool var_383_transpose_x_0 = const()[name = string("op_383_transpose_x_0"), val = bool(false)];
bool var_383_transpose_y_0 = const()[name = string("op_383_transpose_y_0"), val = bool(true)];
tensor<fp16, [1, 24, 64, 1024]> var_383_cast_fp16 = matmul(transpose_x = var_383_transpose_x_0, transpose_y = var_383_transpose_y_0, x = rotated_1, y = var_374_cast_fp16)[name = string("op_383_cast_fp16")];
fp16 var_384_to_fp16 = const()[name = string("op_384_to_fp16"), val = fp16(0x1.6ap-4)];
tensor<fp16, [1, 24, 64, 1024]> attn_weights_1_cast_fp16 = mul(x = var_383_cast_fp16, y = var_384_to_fp16)[name = string("attn_weights_1_cast_fp16")];
tensor<fp16, [1, 24, 64, 1024]> x_21_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask)[name = string("x_21_cast_fp16")];
tensor<int32, [1]> reduce_max_0_axes_0 = const()[name = string("reduce_max_0_axes_0"), val = tensor<int32, [1]>([-1])];
bool reduce_max_0_keep_dims_0 = const()[name = string("reduce_max_0_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 24, 64, 1]> reduce_max_0_cast_fp16 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = x_21_cast_fp16)[name = string("reduce_max_0_cast_fp16")];
tensor<fp16, [1, 24, 64, 1024]> x_23_cast_fp16 = sub(x = x_21_cast_fp16, y = reduce_max_0_cast_fp16)[name = string("x_23_cast_fp16")];
tensor<fp16, [1, 24, 64, 1024]> exp_x_1_cast_fp16 = exp(x = x_23_cast_fp16)[name = string("exp_x_1_cast_fp16")];
tensor<int32, [1]> var_395_axes_0 = const()[name = string("op_395_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_395_keep_dims_0 = const()[name = string("op_395_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 24, 64, 1]> var_395_cast_fp16 = reduce_sum(axes = var_395_axes_0, keep_dims = var_395_keep_dims_0, x = exp_x_1_cast_fp16)[name = string("op_395_cast_fp16")];
tensor<fp16, [1, 24, 64, 1024]> var_396_cast_fp16 = real_div(x = exp_x_1_cast_fp16, y = var_395_cast_fp16)[name = string("op_396_cast_fp16")];
tensor<int32, [3]> concat_12 = const()[name = string("concat_12"), val = tensor<int32, [3]>([24, 64, 1024])];
tensor<fp16, [24, 64, 1024]> reshape_0_cast_fp16 = reshape(shape = concat_12, x = var_396_cast_fp16)[name = string("reshape_0_cast_fp16")];
tensor<int32, [3]> concat_13 = const()[name = string("concat_13"), val = tensor<int32, [3]>([24, 1024, 128])];
tensor<fp16, [24, 1024, 128]> reshape_1_cast_fp16 = reshape(shape = concat_13, x = x_19_cast_fp16)[name = string("reshape_1_cast_fp16")];
bool matmul_0_transpose_x_0 = const()[name = string("matmul_0_transpose_x_0"), val = bool(false)];
bool matmul_0_transpose_y_0 = const()[name = string("matmul_0_transpose_y_0"), val = bool(false)];
tensor<fp16, [24, 64, 128]> matmul_0_cast_fp16 = matmul(transpose_x = matmul_0_transpose_x_0, transpose_y = matmul_0_transpose_y_0, x = reshape_0_cast_fp16, y = reshape_1_cast_fp16)[name = string("matmul_0_cast_fp16")];
tensor<int32, [4]> concat_17 = const()[name = string("concat_17"), val = tensor<int32, [4]>([1, 24, 64, 128])];
tensor<fp16, [1, 24, 64, 128]> reshape_2_cast_fp16 = reshape(shape = concat_17, x = matmul_0_cast_fp16)[name = string("reshape_2_cast_fp16")];
tensor<int32, [4]> var_399_perm_0 = const()[name = string("op_399_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_401 = const()[name = string("op_401"), val = tensor<int32, [3]>([1, 64, 3072])];
tensor<fp16, [1, 64, 24, 128]> var_399_cast_fp16 = transpose(perm = var_399_perm_0, x = reshape_2_cast_fp16)[name = string("transpose_44")];
tensor<fp16, [1, 64, 3072]> input_5_cast_fp16 = reshape(shape = var_401, x = var_399_cast_fp16)[name = string("input_5_cast_fp16")];
tensor<fp16, [3072, 3072]> model_model_layers_14_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387100160))), lut = tensor<fp16, [384, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391818816))))[name = string("model_model_layers_14_self_attn_o_proj_weight_promoted_to_fp16_palettized")];
tensor<fp16, [3072]> linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391831168)))];
tensor<fp16, [1, 64, 3072]> linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_14_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_5_cast_fp16)[name = string("linear_0_cast_fp16")];
tensor<fp16, [1, 64, 3072]> hidden_states_5_cast_fp16 = add(x = hidden_states, y = linear_0_cast_fp16)[name = string("hidden_states_5_cast_fp16")];
tensor<int32, [1]> mean_3_axes_0 = const()[name = string("mean_3_axes_0"), val = tensor<int32, [1]>([-1])];
bool mean_3_keep_dims_0 = const()[name = string("mean_3_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 64, 1]> mean_3_cast_fp16 = reduce_mean(axes = mean_3_axes_0, keep_dims = mean_3_keep_dims_0, x = hidden_states_5_cast_fp16)[name = string("mean_3_cast_fp16")];
tensor<fp16, [1, 64, 3072]> input_7_cast_fp16 = sub(x = hidden_states_5_cast_fp16, y = mean_3_cast_fp16)[name = string("input_7_cast_fp16")];
tensor<int32, [1]> var_412_axes_0 = const()[name = string("op_412_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [3072]> model_model_layers_14_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_14_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391837376)))];
tensor<fp16, [1, 64, 3072]> var_412_cast_fp16 = layer_norm(axes = var_412_axes_0, epsilon = var_49_to_fp16, gamma = model_model_layers_14_post_attention_layernorm_weight_to_fp16, x = input_7_cast_fp16)[name = string("op_412_cast_fp16")];
tensor<int32, [3]> var_419 = const()[name = string("op_419"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [1]> input_9_axes_0 = const()[name = string("input_9_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 3072, 64]> var_420 = transpose(perm = var_419, x = var_412_cast_fp16)[name = string("transpose_43")];
tensor<fp16, [1, 3072, 1, 64]> input_9 = expand_dims(axes = input_9_axes_0, x = var_420)[name = string("input_9")];
string input_11_pad_type_0 = const()[name = string("input_11_pad_type_0"), val = string("valid")];
tensor<int32, [2]> input_11_strides_0 = const()[name = string("input_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> input_11_pad_0 = const()[name = string("input_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> input_11_dilations_0 = const()[name = string("input_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 input_11_groups_0 = const()[name = string("input_11_groups_0"), val = int32(1)];
tensor<fp16, [1, 8192, 1, 64]> input_11 = conv(dilations = input_11_dilations_0, groups = input_11_groups_0, pad = input_11_pad_0, pad_type = input_11_pad_type_0, strides = input_11_strides_0, weight = model_model_layers_14_mlp_gate_proj_weight_palettized, x = input_9)[name = string("input_11")];
string up_states_1_pad_type_0 = const()[name = string("up_states_1_pad_type_0"), val = string("valid")];
tensor<int32, [2]> up_states_1_strides_0 = const()[name = string("up_states_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> up_states_1_pad_0 = const()[name = string("up_states_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> up_states_1_dilations_0 = const()[name = string("up_states_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 up_states_1_groups_0 = const()[name = string("up_states_1_groups_0"), val = int32(1)];
tensor<fp16, [1, 8192, 1, 64]> up_states_1 = conv(dilations = up_states_1_dilations_0, groups = up_states_1_groups_0, pad = up_states_1_pad_0, pad_type = up_states_1_pad_type_0, strides = up_states_1_strides_0, weight = model_model_layers_14_mlp_up_proj_weight_palettized, x = input_9)[name = string("up_states_1")];
tensor<fp16, [1, 8192, 1, 64]> gate_states_1 = silu(x = input_11)[name = string("gate_states_1")];
tensor<fp16, [1, 8192, 1, 64]> input_13 = mul(x = gate_states_1, y = up_states_1)[name = string("input_13")];
string hidden_states_7_pad_type_0 = const()[name = string("hidden_states_7_pad_type_0"), val = string("valid")];
tensor<int32, [2]> hidden_states_7_strides_0 = const()[name = string("hidden_states_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> hidden_states_7_pad_0 = const()[name = string("hidden_states_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> hidden_states_7_dilations_0 = const()[name = string("hidden_states_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 hidden_states_7_groups_0 = const()[name = string("hidden_states_7_groups_0"), val = int32(1)];
tensor<fp16, [1, 3072, 1, 64]> hidden_states_7 = conv(dilations = hidden_states_7_dilations_0, groups = hidden_states_7_groups_0, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = hidden_states_7_strides_0, weight = model_model_layers_14_mlp_down_proj_weight_palettized, x = input_13)[name = string("hidden_states_7")];
tensor<int32, [1]> var_442_axes_0 = const()[name = string("op_442_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 3072, 64]> var_442 = squeeze(axes = var_442_axes_0, x = hidden_states_7)[name = string("op_442")];
tensor<int32, [3]> var_443 = const()[name = string("op_443"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 64, 3072]> var_444 = transpose(perm = var_443, x = var_442)[name = string("transpose_42")];
tensor<fp16, [1, 64, 3072]> hidden_states_9_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = var_444)[name = string("hidden_states_9_cast_fp16")];
tensor<int32, [1]> mean_5_axes_0 = const()[name = string("mean_5_axes_0"), val = tensor<int32, [1]>([-1])];
bool mean_5_keep_dims_0 = const()[name = string("mean_5_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 64, 1]> mean_5_cast_fp16 = reduce_mean(axes = mean_5_axes_0, keep_dims = mean_5_keep_dims_0, x = hidden_states_9_cast_fp16)[name = string("mean_5_cast_fp16")];
tensor<fp16, [1, 64, 3072]> input_15_cast_fp16 = sub(x = hidden_states_9_cast_fp16, y = mean_5_cast_fp16)[name = string("input_15_cast_fp16")];
tensor<int32, [1]> var_452_axes_0 = const()[name = string("op_452_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [3072]> model_model_layers_15_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_15_input_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391843584)))];
tensor<fp16, [1, 64, 3072]> var_452_cast_fp16 = layer_norm(axes = var_452_axes_0, epsilon = var_49_to_fp16, gamma = model_model_layers_15_input_layernorm_weight_to_fp16, x = input_15_cast_fp16)[name = string("op_452_cast_fp16")];
tensor<int32, [3]> var_456 = const()[name = string("op_456"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [1]> var_458_axes_0 = const()[name = string("op_458_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 3072, 64]> var_457 = transpose(perm = var_456, x = var_452_cast_fp16)[name = string("transpose_41")];
tensor<fp16, [1, 3072, 1, 64]> var_458 = expand_dims(axes = var_458_axes_0, x = var_457)[name = string("op_458")];
string query_states_5_pad_type_0 = const()[name = string("query_states_5_pad_type_0"), val = string("valid")];
tensor<int32, [2]> query_states_5_strides_0 = const()[name = string("query_states_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> query_states_5_pad_0 = const()[name = string("query_states_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> query_states_5_dilations_0 = const()[name = string("query_states_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 query_states_5_groups_0 = const()[name = string("query_states_5_groups_0"), val = int32(1)];
tensor<fp16, [1, 3072, 1, 64]> query_states_5 = conv(dilations = query_states_5_dilations_0, groups = query_states_5_groups_0, pad = query_states_5_pad_0, pad_type = query_states_5_pad_type_0, strides = query_states_5_strides_0, weight = model_model_layers_15_self_attn_q_proj_weight_palettized, x = var_458)[name = string("query_states_5")];
string key_states_7_pad_type_0 = const()[name = string("key_states_7_pad_type_0"), val = string("valid")];
tensor<int32, [2]> key_states_7_strides_0 = const()[name = string("key_states_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> key_states_7_pad_0 = const()[name = string("key_states_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> key_states_7_dilations_0 = const()[name = string("key_states_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 key_states_7_groups_0 = const()[name = string("key_states_7_groups_0"), val = int32(1)];
tensor<fp16, [1, 1024, 1, 64]> key_states_7 = conv(dilations = key_states_7_dilations_0, groups = key_states_7_groups_0, pad = key_states_7_pad_0, pad_type = key_states_7_pad_type_0, strides = key_states_7_strides_0, weight = model_model_layers_15_self_attn_k_proj_weight_palettized, x = var_458)[name = string("key_states_7")];
string value_states_7_pad_type_0 = const()[name = string("value_states_7_pad_type_0"), val = string("valid")];
tensor<int32, [2]> value_states_7_strides_0 = const()[name = string("value_states_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> value_states_7_pad_0 = const()[name = string("value_states_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> value_states_7_dilations_0 = const()[name = string("value_states_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 value_states_7_groups_0 = const()[name = string("value_states_7_groups_0"), val = int32(1)];
tensor<fp16, [1, 1024, 1, 64]> value_states_7 = conv(dilations = value_states_7_dilations_0, groups = value_states_7_groups_0, pad = value_states_7_pad_0, pad_type = value_states_7_pad_type_0, strides = value_states_7_strides_0, weight = model_model_layers_15_self_attn_v_proj_weight_palettized, x = var_458)[name = string("value_states_7")];
tensor<int32, [4]> var_478 = const()[name = string("op_478"), val = tensor<int32, [4]>([1, 24, 128, 64])];
tensor<fp16, [1, 24, 128, 64]> var_479 = reshape(shape = var_478, x = query_states_5)[name = string("op_479")];
tensor<int32, [4]> var_480 = const()[name = string("op_480"), val = tensor<int32, [4]>([0, 1, 3, 2])];
tensor<int32, [4]> var_482 = const()[name = string("op_482"), val = tensor<int32, [4]>([1, 8, 128, 64])];
tensor<fp16, [1, 8, 128, 64]> var_483 = reshape(shape = var_482, x = key_states_7)[name = string("op_483")];
tensor<int32, [4]> var_484 = const()[name = string("op_484"), val = tensor<int32, [4]>([0, 1, 3, 2])];
tensor<int32, [4]> var_486 = const()[name = string("op_486"), val = tensor<int32, [4]>([1, 8, 128, 64])];
tensor<fp16, [1, 8, 128, 64]> var_487 = reshape(shape = var_486, x = value_states_7)[name = string("op_487")];
tensor<int32, [4]> var_488 = const()[name = string("op_488"), val = tensor<int32, [4]>([0, 1, 3, 2])];
tensor<int32, [4]> x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor<int32, [4]>([1, 24, 64, 64])];
tensor<bool, [4]> x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 24, 64, 128]> x_29 = transpose(perm = var_480, x = var_479)[name = string("transpose_40")];
tensor<fp16, [1, 24, 64, 64]> x1_5 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = x_29)[name = string("x1_5")];
tensor<int32, [4]> x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor<int32, [4]>([1, 24, 64, 128])];
tensor<bool, [4]> x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 24, 64, 64]> x2_5 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = x_29)[name = string("x2_5")];
tensor<fp16, [1, 24, 64, 64]> var_506 = mul(x = x1_5, y = cos_7)[name = string("op_506")];
tensor<fp16, [1, 24, 64, 64]> var_507 = mul(x = x2_5, y = sin_7)[name = string("op_507")];
tensor<fp16, [1, 24, 64, 64]> var_508 = sub(x = var_506, y = var_507)[name = string("op_508")];
tensor<fp16, [1, 24, 64, 64]> var_509 = mul(x = x2_5, y = cos_7)[name = string("op_509")];
tensor<fp16, [1, 24, 64, 64]> var_510 = mul(x = x1_5, y = sin_7)[name = string("op_510")];
tensor<fp16, [1, 24, 64, 64]> var_511 = add(x = var_509, y = var_510)[name = string("op_511")];
bool rotated_5_interleave_0 = const()[name = string("rotated_5_interleave_0"), val = bool(false)];
tensor<fp16, [1, 24, 64, 128]> rotated_5 = concat(axis = var_47, interleave = rotated_5_interleave_0, values = (var_508, var_511))[name = string("rotated_5")];
tensor<int32, [4]> x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor<int32, [4]>([1, 8, 64, 64])];
tensor<bool, [4]> x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 8, 64, 128]> x_33 = transpose(perm = var_484, x = var_483)[name = string("transpose_39")];
tensor<fp16, [1, 8, 64, 64]> x1_7 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = x_33)[name = string("x1_7")];
tensor<int32, [4]> x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor<int32, [4]>([1, 8, 64, 128])];
tensor<bool, [4]> x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 8, 64, 64]> x2_7 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = x_33)[name = string("x2_7")];
tensor<fp16, [1, 8, 64, 64]> var_527 = mul(x = x1_7, y = cos_7)[name = string("op_527")];
tensor<fp16, [1, 8, 64, 64]> var_528 = mul(x = x2_7, y = sin_7)[name = string("op_528")];
tensor<fp16, [1, 8, 64, 64]> var_529 = sub(x = var_527, y = var_528)[name = string("op_529")];
tensor<fp16, [1, 8, 64, 64]> var_530 = mul(x = x2_7, y = cos_7)[name = string("op_530")];
tensor<fp16, [1, 8, 64, 64]> var_531 = mul(x = x1_7, y = sin_7)[name = string("op_531")];
tensor<fp16, [1, 8, 64, 64]> var_532 = add(x = var_530, y = var_531)[name = string("op_532")];
bool rotated_7_interleave_0 = const()[name = string("rotated_7_interleave_0"), val = bool(false)];
tensor<fp16, [1, 8, 64, 128]> rotated_7 = concat(axis = var_47, interleave = rotated_7_interleave_0, values = (var_529, var_532))[name = string("rotated_7")];
tensor<int32, [1]> expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor<int32, [1]>([15])];
tensor<int32, [1]> expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor<int32, [1]>([16])];
int32 concat_20_axis_0 = const()[name = string("concat_20_axis_0"), val = int32(0)];
bool concat_20_interleave_0 = const()[name = string("concat_20_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_20 = concat(axis = concat_20_axis_0, interleave = concat_20_interleave_0, values = (expand_dims_12, expand_dims_13, current_pos, expand_dims_15))[name = string("concat_20")];
tensor<int32, [1]> concat_21_values1_0 = const()[name = string("concat_21_values1_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> concat_21_values3_0 = const()[name = string("concat_21_values3_0"), val = tensor<int32, [1]>([0])];
int32 concat_21_axis_0 = const()[name = string("concat_21_axis_0"), val = int32(0)];
bool concat_21_interleave_0 = const()[name = string("concat_21_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_21 = concat(axis = concat_21_axis_0, interleave = concat_21_interleave_0, values = (expand_dims_16, concat_21_values1_0, var_344, concat_21_values3_0))[name = string("concat_21")];
tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_20, begin_mask = model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0, end = concat_21, end_mask = model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_3_stride_0, update = rotated_7, x = coreml_update_state_15)[name = string("model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16")];
write_state(data = model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_16_write_state")];
tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_16 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_16")];
tensor<int32, [1]> expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor<int32, [1]>([43])];
tensor<int32, [1]> expand_dims_19 = const()[name = string("expand_dims_19"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor<int32, [1]>([44])];
int32 concat_24_axis_0 = const()[name = string("concat_24_axis_0"), val = int32(0)];
bool concat_24_interleave_0 = const()[name = string("concat_24_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_24 = concat(axis = concat_24_axis_0, interleave = concat_24_interleave_0, values = (expand_dims_18, expand_dims_19, current_pos, expand_dims_21))[name = string("concat_24")];
tensor<int32, [1]> concat_25_values1_0 = const()[name = string("concat_25_values1_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> concat_25_values3_0 = const()[name = string("concat_25_values3_0"), val = tensor<int32, [1]>([0])];
int32 concat_25_axis_0 = const()[name = string("concat_25_axis_0"), val = int32(0)];
bool concat_25_interleave_0 = const()[name = string("concat_25_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_25 = concat(axis = concat_25_axis_0, interleave = concat_25_interleave_0, values = (expand_dims_22, concat_25_values1_0, var_344, concat_25_values3_0))[name = string("concat_25")];
tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<fp16, [1, 8, 64, 128]> value_states_9 = transpose(perm = var_488, x = var_487)[name = string("transpose_38")];
tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_24, begin_mask = model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0, end = concat_25, end_mask = model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_4_stride_0, update = value_states_9, x = coreml_update_state_16)[name = string("model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16")];
write_state(data = model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_17_write_state")];
tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_17 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_17")];
tensor<int32, [4]> var_555_begin_0 = const()[name = string("op_555_begin_0"), val = tensor<int32, [4]>([15, 0, 0, 0])];
tensor<int32, [4]> var_555_end_0 = const()[name = string("op_555_end_0"), val = tensor<int32, [4]>([16, 8, 1024, 128])];
tensor<bool, [4]> var_555_end_mask_0 = const()[name = string("op_555_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<fp16, [1, 8, 1024, 128]> var_555_cast_fp16 = slice_by_index(begin = var_555_begin_0, end = var_555_end_0, end_mask = var_555_end_mask_0, x = coreml_update_state_17)[name = string("op_555_cast_fp16")];
tensor<int32, [1]> K_layer_cache_3_axes_0 = const()[name = string("K_layer_cache_3_axes_0"), val = tensor<int32, [1]>([0])];
tensor<fp16, [8, 1024, 128]> K_layer_cache_3_cast_fp16 = squeeze(axes = K_layer_cache_3_axes_0, x = var_555_cast_fp16)[name = string("K_layer_cache_3_cast_fp16")];
tensor<int32, [4]> var_557_begin_0 = const()[name = string("op_557_begin_0"), val = tensor<int32, [4]>([43, 0, 0, 0])];
tensor<int32, [4]> var_557_end_0 = const()[name = string("op_557_end_0"), val = tensor<int32, [4]>([44, 8, 1024, 128])];
tensor<bool, [4]> var_557_end_mask_0 = const()[name = string("op_557_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<fp16, [1, 8, 1024, 128]> var_557_cast_fp16 = slice_by_index(begin = var_557_begin_0, end = var_557_end_0, end_mask = var_557_end_mask_0, x = coreml_update_state_17)[name = string("op_557_cast_fp16")];
tensor<int32, [1]> V_layer_cache_3_axes_0 = const()[name = string("V_layer_cache_3_axes_0"), val = tensor<int32, [1]>([0])];
tensor<fp16, [8, 1024, 128]> V_layer_cache_3_cast_fp16 = squeeze(axes = V_layer_cache_3_axes_0, x = var_557_cast_fp16)[name = string("V_layer_cache_3_cast_fp16")];
tensor<int32, [1]> x_39_axes_0 = const()[name = string("x_39_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [8, 1, 1024, 128]> x_39_cast_fp16 = expand_dims(axes = x_39_axes_0, x = K_layer_cache_3_cast_fp16)[name = string("x_39_cast_fp16")];
tensor<int32, [4]> var_566 = const()[name = string("op_566"), val = tensor<int32, [4]>([1, 3, 1, 1])];
tensor<fp16, [8, 3, 1024, 128]> x_41_cast_fp16 = tile(reps = var_566, x = x_39_cast_fp16)[name = string("x_41_cast_fp16")];
tensor<int32, [4]> var_570 = const()[name = string("op_570"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
tensor<fp16, [1, 24, 1024, 128]> var_571_cast_fp16 = reshape(shape = var_570, x = x_41_cast_fp16)[name = string("op_571_cast_fp16")];
tensor<int32, [1]> x_45_axes_0 = const()[name = string("x_45_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [8, 1, 1024, 128]> x_45_cast_fp16 = expand_dims(axes = x_45_axes_0, x = V_layer_cache_3_cast_fp16)[name = string("x_45_cast_fp16")];
tensor<int32, [4]> var_573 = const()[name = string("op_573"), val = tensor<int32, [4]>([1, 3, 1, 1])];
tensor<fp16, [8, 3, 1024, 128]> x_47_cast_fp16 = tile(reps = var_573, x = x_45_cast_fp16)[name = string("x_47_cast_fp16")];
bool var_580_transpose_x_0 = const()[name = string("op_580_transpose_x_0"), val = bool(false)];
bool var_580_transpose_y_0 = const()[name = string("op_580_transpose_y_0"), val = bool(true)];
tensor<fp16, [1, 24, 64, 1024]> var_580_cast_fp16 = matmul(transpose_x = var_580_transpose_x_0, transpose_y = var_580_transpose_y_0, x = rotated_5, y = var_571_cast_fp16)[name = string("op_580_cast_fp16")];
fp16 var_581_to_fp16 = const()[name = string("op_581_to_fp16"), val = fp16(0x1.6ap-4)];
tensor<fp16, [1, 24, 64, 1024]> attn_weights_3_cast_fp16 = mul(x = var_580_cast_fp16, y = var_581_to_fp16)[name = string("attn_weights_3_cast_fp16")];
tensor<fp16, [1, 24, 64, 1024]> x_49_cast_fp16 = add(x = attn_weights_3_cast_fp16, y = causal_mask)[name = string("x_49_cast_fp16")];
tensor<int32, [1]> reduce_max_1_axes_0 = const()[name = string("reduce_max_1_axes_0"), val = tensor<int32, [1]>([-1])];
bool reduce_max_1_keep_dims_0 = const()[name = string("reduce_max_1_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 24, 64, 1]> reduce_max_1_cast_fp16 = reduce_max(axes = reduce_max_1_axes_0, keep_dims = reduce_max_1_keep_dims_0, x = x_49_cast_fp16)[name = string("reduce_max_1_cast_fp16")];
tensor<fp16, [1, 24, 64, 1024]> x_51_cast_fp16 = sub(x = x_49_cast_fp16, y = reduce_max_1_cast_fp16)[name = string("x_51_cast_fp16")];
tensor<fp16, [1, 24, 64, 1024]> exp_x_3_cast_fp16 = exp(x = x_51_cast_fp16)[name = string("exp_x_3_cast_fp16")];
tensor<int32, [1]> var_592_axes_0 = const()[name = string("op_592_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_592_keep_dims_0 = const()[name = string("op_592_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 24, 64, 1]> var_592_cast_fp16 = reduce_sum(axes = var_592_axes_0, keep_dims = var_592_keep_dims_0, x = exp_x_3_cast_fp16)[name = string("op_592_cast_fp16")];
tensor<fp16, [1, 24, 64, 1024]> var_593_cast_fp16 = real_div(x = exp_x_3_cast_fp16, y = var_592_cast_fp16)[name = string("op_593_cast_fp16")];
tensor<int32, [3]> concat_30 = const()[name = string("concat_30"), val = tensor<int32, [3]>([24, 64, 1024])];
tensor<fp16, [24, 64, 1024]> reshape_3_cast_fp16 = reshape(shape = concat_30, x = var_593_cast_fp16)[name = string("reshape_3_cast_fp16")];
tensor<int32, [3]> concat_31 = const()[name = string("concat_31"), val = tensor<int32, [3]>([24, 1024, 128])];
tensor<fp16, [24, 1024, 128]> reshape_4_cast_fp16 = reshape(shape = concat_31, x = x_47_cast_fp16)[name = string("reshape_4_cast_fp16")];
bool matmul_1_transpose_x_0 = const()[name = string("matmul_1_transpose_x_0"), val = bool(false)];
bool matmul_1_transpose_y_0 = const()[name = string("matmul_1_transpose_y_0"), val = bool(false)];
tensor<fp16, [24, 64, 128]> matmul_1_cast_fp16 = matmul(transpose_x = matmul_1_transpose_x_0, transpose_y = matmul_1_transpose_y_0, x = reshape_3_cast_fp16, y = reshape_4_cast_fp16)[name = string("matmul_1_cast_fp16")];
tensor<int32, [4]> concat_35 = const()[name = string("concat_35"), val = tensor<int32, [4]>([1, 24, 64, 128])];
tensor<fp16, [1, 24, 64, 128]> reshape_5_cast_fp16 = reshape(shape = concat_35, x = matmul_1_cast_fp16)[name = string("reshape_5_cast_fp16")];
tensor<int32, [4]> var_596_perm_0 = const()[name = string("op_596_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_598 = const()[name = string("op_598"), val = tensor<int32, [3]>([1, 64, 3072])];
tensor<fp16, [1, 64, 24, 128]> var_596_cast_fp16 = transpose(perm = var_596_perm_0, x = reshape_5_cast_fp16)[name = string("transpose_37")];
tensor<fp16, [1, 64, 3072]> input_19_cast_fp16 = reshape(shape = var_598, x = var_596_cast_fp16)[name = string("input_19_cast_fp16")];
tensor<fp16, [3072, 3072]> model_model_layers_15_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391849792))), lut = tensor<fp16, [384, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396568448))))[name = string("model_model_layers_15_self_attn_o_proj_weight_promoted_to_fp16_palettized")];
tensor<fp16, [1, 64, 3072]> linear_1_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_15_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_19_cast_fp16)[name = string("linear_1_cast_fp16")];
tensor<fp16, [1, 64, 3072]> hidden_states_13_cast_fp16 = add(x = hidden_states_9_cast_fp16, y = linear_1_cast_fp16)[name = string("hidden_states_13_cast_fp16")];
tensor<int32, [1]> mean_7_axes_0 = const()[name = string("mean_7_axes_0"), val = tensor<int32, [1]>([-1])];
bool mean_7_keep_dims_0 = const()[name = string("mean_7_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 64, 1]> mean_7_cast_fp16 = reduce_mean(axes = mean_7_axes_0, keep_dims = mean_7_keep_dims_0, x = hidden_states_13_cast_fp16)[name = string("mean_7_cast_fp16")];
tensor<fp16, [1, 64, 3072]> input_21_cast_fp16 = sub(x = hidden_states_13_cast_fp16, y = mean_7_cast_fp16)[name = string("input_21_cast_fp16")];
tensor<int32, [1]> var_609_axes_0 = const()[name = string("op_609_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [3072]> model_model_layers_15_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_15_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396580800)))];
tensor<fp16, [1, 64, 3072]> var_609_cast_fp16 = layer_norm(axes = var_609_axes_0, epsilon = var_49_to_fp16, gamma = model_model_layers_15_post_attention_layernorm_weight_to_fp16, x = input_21_cast_fp16)[name = string("op_609_cast_fp16")];
tensor<int32, [3]> var_616 = const()[name = string("op_616"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [1]> input_23_axes_0 = const()[name = string("input_23_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 3072, 64]> var_617 = transpose(perm = var_616, x = var_609_cast_fp16)[name = string("transpose_36")];
tensor<fp16, [1, 3072, 1, 64]> input_23 = expand_dims(axes = input_23_axes_0, x = var_617)[name = string("input_23")];
string input_25_pad_type_0 = const()[name = string("input_25_pad_type_0"), val = string("valid")];
tensor<int32, [2]> input_25_strides_0 = const()[name = string("input_25_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> input_25_pad_0 = const()[name = string("input_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> input_25_dilations_0 = const()[name = string("input_25_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 input_25_groups_0 = const()[name = string("input_25_groups_0"), val = int32(1)];
tensor<fp16, [1, 8192, 1, 64]> input_25 = conv(dilations = input_25_dilations_0, groups = input_25_groups_0, pad = input_25_pad_0, pad_type = input_25_pad_type_0, strides = input_25_strides_0, weight = model_model_layers_15_mlp_gate_proj_weight_palettized, x = input_23)[name = string("input_25")];
string up_states_3_pad_type_0 = const()[name = string("up_states_3_pad_type_0"), val = string("valid")];
tensor<int32, [2]> up_states_3_strides_0 = const()[name = string("up_states_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> up_states_3_pad_0 = const()[name = string("up_states_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> up_states_3_dilations_0 = const()[name = string("up_states_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 up_states_3_groups_0 = const()[name = string("up_states_3_groups_0"), val = int32(1)];
tensor<fp16, [1, 8192, 1, 64]> up_states_3 = conv(dilations = up_states_3_dilations_0, groups = up_states_3_groups_0, pad = up_states_3_pad_0, pad_type = up_states_3_pad_type_0, strides = up_states_3_strides_0, weight = model_model_layers_15_mlp_up_proj_weight_palettized, x = input_23)[name = string("up_states_3")];
tensor<fp16, [1, 8192, 1, 64]> gate_states_3 = silu(x = input_25)[name = string("gate_states_3")];
tensor<fp16, [1, 8192, 1, 64]> input_27 = mul(x = gate_states_3, y = up_states_3)[name = string("input_27")];
string hidden_states_15_pad_type_0 = const()[name = string("hidden_states_15_pad_type_0"), val = string("valid")];
tensor<int32, [2]> hidden_states_15_strides_0 = const()[name = string("hidden_states_15_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> hidden_states_15_pad_0 = const()[name = string("hidden_states_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> hidden_states_15_dilations_0 = const()[name = string("hidden_states_15_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 hidden_states_15_groups_0 = const()[name = string("hidden_states_15_groups_0"), val = int32(1)];
tensor<fp16, [1, 3072, 1, 64]> hidden_states_15 = conv(dilations = hidden_states_15_dilations_0, groups = hidden_states_15_groups_0, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = hidden_states_15_strides_0, weight = model_model_layers_15_mlp_down_proj_weight_palettized, x = input_27)[name = string("hidden_states_15")];
tensor<int32, [1]> var_639_axes_0 = const()[name = string("op_639_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 3072, 64]> var_639 = squeeze(axes = var_639_axes_0, x = hidden_states_15)[name = string("op_639")];
tensor<int32, [3]> var_640 = const()[name = string("op_640"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 64, 3072]> var_641 = transpose(perm = var_640, x = var_639)[name = string("transpose_35")];
tensor<fp16, [1, 64, 3072]> hidden_states_17_cast_fp16 = add(x = hidden_states_13_cast_fp16, y = var_641)[name = string("hidden_states_17_cast_fp16")];
tensor<int32, [1]> mean_9_axes_0 = const()[name = string("mean_9_axes_0"), val = tensor<int32, [1]>([-1])];
bool mean_9_keep_dims_0 = const()[name = string("mean_9_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 64, 1]> mean_9_cast_fp16 = reduce_mean(axes = mean_9_axes_0, keep_dims = mean_9_keep_dims_0, x = hidden_states_17_cast_fp16)[name = string("mean_9_cast_fp16")];
tensor<fp16, [1, 64, 3072]> input_29_cast_fp16 = sub(x = hidden_states_17_cast_fp16, y = mean_9_cast_fp16)[name = string("input_29_cast_fp16")];
tensor<int32, [1]> var_649_axes_0 = const()[name = string("op_649_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [3072]> model_model_layers_16_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_16_input_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396587008)))];
tensor<fp16, [1, 64, 3072]> var_649_cast_fp16 = layer_norm(axes = var_649_axes_0, epsilon = var_49_to_fp16, gamma = model_model_layers_16_input_layernorm_weight_to_fp16, x = input_29_cast_fp16)[name = string("op_649_cast_fp16")];
tensor<int32, [3]> var_653 = const()[name = string("op_653"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [1]> var_655_axes_0 = const()[name = string("op_655_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 3072, 64]> var_654 = transpose(perm = var_653, x = var_649_cast_fp16)[name = string("transpose_34")];
tensor<fp16, [1, 3072, 1, 64]> var_655 = expand_dims(axes = var_655_axes_0, x = var_654)[name = string("op_655")];
string query_states_9_pad_type_0 = const()[name = string("query_states_9_pad_type_0"), val = string("valid")];
tensor<int32, [2]> query_states_9_strides_0 = const()[name = string("query_states_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> query_states_9_pad_0 = const()[name = string("query_states_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> query_states_9_dilations_0 = const()[name = string("query_states_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 query_states_9_groups_0 = const()[name = string("query_states_9_groups_0"), val = int32(1)];
tensor<fp16, [1, 3072, 1, 64]> query_states_9 = conv(dilations = query_states_9_dilations_0, groups = query_states_9_groups_0, pad = query_states_9_pad_0, pad_type = query_states_9_pad_type_0, strides = query_states_9_strides_0, weight = model_model_layers_16_self_attn_q_proj_weight_palettized, x = var_655)[name = string("query_states_9")];
string key_states_13_pad_type_0 = const()[name = string("key_states_13_pad_type_0"), val = string("valid")];
tensor<int32, [2]> key_states_13_strides_0 = const()[name = string("key_states_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> key_states_13_pad_0 = const()[name = string("key_states_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> key_states_13_dilations_0 = const()[name = string("key_states_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 key_states_13_groups_0 = const()[name = string("key_states_13_groups_0"), val = int32(1)];
tensor<fp16, [1, 1024, 1, 64]> key_states_13 = conv(dilations = key_states_13_dilations_0, groups = key_states_13_groups_0, pad = key_states_13_pad_0, pad_type = key_states_13_pad_type_0, strides = key_states_13_strides_0, weight = model_model_layers_16_self_attn_k_proj_weight_palettized, x = var_655)[name = string("key_states_13")];
string value_states_13_pad_type_0 = const()[name = string("value_states_13_pad_type_0"), val = string("valid")];
tensor<int32, [2]> value_states_13_strides_0 = const()[name = string("value_states_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> value_states_13_pad_0 = const()[name = string("value_states_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> value_states_13_dilations_0 = const()[name = string("value_states_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 value_states_13_groups_0 = const()[name = string("value_states_13_groups_0"), val = int32(1)];
tensor<fp16, [1, 1024, 1, 64]> value_states_13 = conv(dilations = value_states_13_dilations_0, groups = value_states_13_groups_0, pad = value_states_13_pad_0, pad_type = value_states_13_pad_type_0, strides = value_states_13_strides_0, weight = model_model_layers_16_self_attn_v_proj_weight_palettized, x = var_655)[name = string("value_states_13")];
tensor<int32, [4]> var_675 = const()[name = string("op_675"), val = tensor<int32, [4]>([1, 24, 128, 64])];
tensor<fp16, [1, 24, 128, 64]> var_676 = reshape(shape = var_675, x = query_states_9)[name = string("op_676")];
tensor<int32, [4]> var_677 = const()[name = string("op_677"), val = tensor<int32, [4]>([0, 1, 3, 2])];
tensor<int32, [4]> var_679 = const()[name = string("op_679"), val = tensor<int32, [4]>([1, 8, 128, 64])];
tensor<fp16, [1, 8, 128, 64]> var_680 = reshape(shape = var_679, x = key_states_13)[name = string("op_680")];
tensor<int32, [4]> var_681 = const()[name = string("op_681"), val = tensor<int32, [4]>([0, 1, 3, 2])];
tensor<int32, [4]> var_683 = const()[name = string("op_683"), val = tensor<int32, [4]>([1, 8, 128, 64])];
tensor<fp16, [1, 8, 128, 64]> var_684 = reshape(shape = var_683, x = value_states_13)[name = string("op_684")];
tensor<int32, [4]> var_685 = const()[name = string("op_685"), val = tensor<int32, [4]>([0, 1, 3, 2])];
tensor<int32, [4]> x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor<int32, [4]>([1, 24, 64, 64])];
tensor<bool, [4]> x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 24, 64, 128]> x_57 = transpose(perm = var_677, x = var_676)[name = string("transpose_33")];
tensor<fp16, [1, 24, 64, 64]> x1_9 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = x_57)[name = string("x1_9")];
tensor<int32, [4]> x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor<int32, [4]>([1, 24, 64, 128])];
tensor<bool, [4]> x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 24, 64, 64]> x2_9 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = x_57)[name = string("x2_9")];
tensor<fp16, [1, 24, 64, 64]> var_703 = mul(x = x1_9, y = cos_7)[name = string("op_703")];
tensor<fp16, [1, 24, 64, 64]> var_704 = mul(x = x2_9, y = sin_7)[name = string("op_704")];
tensor<fp16, [1, 24, 64, 64]> var_705 = sub(x = var_703, y = var_704)[name = string("op_705")];
tensor<fp16, [1, 24, 64, 64]> var_706 = mul(x = x2_9, y = cos_7)[name = string("op_706")];
tensor<fp16, [1, 24, 64, 64]> var_707 = mul(x = x1_9, y = sin_7)[name = string("op_707")];
tensor<fp16, [1, 24, 64, 64]> var_708 = add(x = var_706, y = var_707)[name = string("op_708")];
bool rotated_9_interleave_0 = const()[name = string("rotated_9_interleave_0"), val = bool(false)];
tensor<fp16, [1, 24, 64, 128]> rotated_9 = concat(axis = var_47, interleave = rotated_9_interleave_0, values = (var_705, var_708))[name = string("rotated_9")];
tensor<int32, [4]> x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor<int32, [4]>([1, 8, 64, 64])];
tensor<bool, [4]> x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 8, 64, 128]> x_61 = transpose(perm = var_681, x = var_680)[name = string("transpose_32")];
tensor<fp16, [1, 8, 64, 64]> x1_11 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = x_61)[name = string("x1_11")];
tensor<int32, [4]> x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor<int32, [4]>([1, 8, 64, 128])];
tensor<bool, [4]> x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 8, 64, 64]> x2_11 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = x_61)[name = string("x2_11")];
tensor<fp16, [1, 8, 64, 64]> var_724 = mul(x = x1_11, y = cos_7)[name = string("op_724")];
tensor<fp16, [1, 8, 64, 64]> var_725 = mul(x = x2_11, y = sin_7)[name = string("op_725")];
tensor<fp16, [1, 8, 64, 64]> var_726 = sub(x = var_724, y = var_725)[name = string("op_726")];
tensor<fp16, [1, 8, 64, 64]> var_727 = mul(x = x2_11, y = cos_7)[name = string("op_727")];
tensor<fp16, [1, 8, 64, 64]> var_728 = mul(x = x1_11, y = sin_7)[name = string("op_728")];
tensor<fp16, [1, 8, 64, 64]> var_729 = add(x = var_727, y = var_728)[name = string("op_729")];
bool rotated_11_interleave_0 = const()[name = string("rotated_11_interleave_0"), val = bool(false)];
tensor<fp16, [1, 8, 64, 128]> rotated_11 = concat(axis = var_47, interleave = rotated_11_interleave_0, values = (var_726, var_729))[name = string("rotated_11")];
tensor<int32, [1]> expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor<int32, [1]>([16])];
tensor<int32, [1]> expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_28 = const()[name = string("expand_dims_28"), val = tensor<int32, [1]>([17])];
int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)];
bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (expand_dims_24, expand_dims_25, current_pos, expand_dims_27))[name = string("concat_38")];
tensor<int32, [1]> concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor<int32, [1]>([0])];
int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)];
bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (expand_dims_28, concat_39_values1_0, var_344, concat_39_values3_0))[name = string("concat_39")];
tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_38, begin_mask = model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0, end = concat_39, end_mask = model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_5_stride_0, update = rotated_11, x = coreml_update_state_17)[name = string("model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16")];
write_state(data = model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_18_write_state")];
tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_18 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_18")];
tensor<int32, [1]> expand_dims_30 = const()[name = string("expand_dims_30"), val = tensor<int32, [1]>([44])];
tensor<int32, [1]> expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_33 = const()[name = string("expand_dims_33"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor<int32, [1]>([45])];
int32 concat_42_axis_0 = const()[name = string("concat_42_axis_0"), val = int32(0)];
bool concat_42_interleave_0 = const()[name = string("concat_42_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_42 = concat(axis = concat_42_axis_0, interleave = concat_42_interleave_0, values = (expand_dims_30, expand_dims_31, current_pos, expand_dims_33))[name = string("concat_42")];
tensor<int32, [1]> concat_43_values1_0 = const()[name = string("concat_43_values1_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor<int32, [1]>([0])];
int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)];
bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_34, concat_43_values1_0, var_344, concat_43_values3_0))[name = string("concat_43")];
tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<fp16, [1, 8, 64, 128]> value_states_15 = transpose(perm = var_685, x = var_684)[name = string("transpose_31")];
tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_42, begin_mask = model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0, end = concat_43, end_mask = model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_6_stride_0, update = value_states_15, x = coreml_update_state_18)[name = string("model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16")];
write_state(data = model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_19_write_state")];
tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_19 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_19")];
tensor<int32, [4]> var_752_begin_0 = const()[name = string("op_752_begin_0"), val = tensor<int32, [4]>([16, 0, 0, 0])];
tensor<int32, [4]> var_752_end_0 = const()[name = string("op_752_end_0"), val = tensor<int32, [4]>([17, 8, 1024, 128])];
tensor<bool, [4]> var_752_end_mask_0 = const()[name = string("op_752_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<fp16, [1, 8, 1024, 128]> var_752_cast_fp16 = slice_by_index(begin = var_752_begin_0, end = var_752_end_0, end_mask = var_752_end_mask_0, x = coreml_update_state_19)[name = string("op_752_cast_fp16")];
tensor<int32, [1]> K_layer_cache_5_axes_0 = const()[name = string("K_layer_cache_5_axes_0"), val = tensor<int32, [1]>([0])];
tensor<fp16, [8, 1024, 128]> K_layer_cache_5_cast_fp16 = squeeze(axes = K_layer_cache_5_axes_0, x = var_752_cast_fp16)[name = string("K_layer_cache_5_cast_fp16")];
tensor<int32, [4]> var_754_begin_0 = const()[name = string("op_754_begin_0"), val = tensor<int32, [4]>([44, 0, 0, 0])];
tensor<int32, [4]> var_754_end_0 = const()[name = string("op_754_end_0"), val = tensor<int32, [4]>([45, 8, 1024, 128])];
tensor<bool, [4]> var_754_end_mask_0 = const()[name = string("op_754_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<fp16, [1, 8, 1024, 128]> var_754_cast_fp16 = slice_by_index(begin = var_754_begin_0, end = var_754_end_0, end_mask = var_754_end_mask_0, x = coreml_update_state_19)[name = string("op_754_cast_fp16")];
tensor<int32, [1]> V_layer_cache_5_axes_0 = const()[name = string("V_layer_cache_5_axes_0"), val = tensor<int32, [1]>([0])];
tensor<fp16, [8, 1024, 128]> V_layer_cache_5_cast_fp16 = squeeze(axes = V_layer_cache_5_axes_0, x = var_754_cast_fp16)[name = string("V_layer_cache_5_cast_fp16")];
tensor<int32, [1]> x_67_axes_0 = const()[name = string("x_67_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [8, 1, 1024, 128]> x_67_cast_fp16 = expand_dims(axes = x_67_axes_0, x = K_layer_cache_5_cast_fp16)[name = string("x_67_cast_fp16")];
tensor<int32, [4]> var_763 = const()[name = string("op_763"), val = tensor<int32, [4]>([1, 3, 1, 1])];
tensor<fp16, [8, 3, 1024, 128]> x_69_cast_fp16 = tile(reps = var_763, x = x_67_cast_fp16)[name = string("x_69_cast_fp16")];
tensor<int32, [4]> var_767 = const()[name = string("op_767"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
tensor<fp16, [1, 24, 1024, 128]> var_768_cast_fp16 = reshape(shape = var_767, x = x_69_cast_fp16)[name = string("op_768_cast_fp16")];
tensor<int32, [1]> x_73_axes_0 = const()[name = string("x_73_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [8, 1, 1024, 128]> x_73_cast_fp16 = expand_dims(axes = x_73_axes_0, x = V_layer_cache_5_cast_fp16)[name = string("x_73_cast_fp16")];
tensor<int32, [4]> var_770 = const()[name = string("op_770"), val = tensor<int32, [4]>([1, 3, 1, 1])];
tensor<fp16, [8, 3, 1024, 128]> x_75_cast_fp16 = tile(reps = var_770, x = x_73_cast_fp16)[name = string("x_75_cast_fp16")];
bool var_777_transpose_x_0 = const()[name = string("op_777_transpose_x_0"), val = bool(false)];
bool var_777_transpose_y_0 = const()[name = string("op_777_transpose_y_0"), val = bool(true)];
tensor<fp16, [1, 24, 64, 1024]> var_777_cast_fp16 = matmul(transpose_x = var_777_transpose_x_0, transpose_y = var_777_transpose_y_0, x = rotated_9, y = var_768_cast_fp16)[name = string("op_777_cast_fp16")];
fp16 var_778_to_fp16 = const()[name = string("op_778_to_fp16"), val = fp16(0x1.6ap-4)];
tensor<fp16, [1, 24, 64, 1024]> attn_weights_5_cast_fp16 = mul(x = var_777_cast_fp16, y = var_778_to_fp16)[name = string("attn_weights_5_cast_fp16")];
tensor<fp16, [1, 24, 64, 1024]> x_77_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = causal_mask)[name = string("x_77_cast_fp16")];
tensor<int32, [1]> reduce_max_2_axes_0 = const()[name = string("reduce_max_2_axes_0"), val = tensor<int32, [1]>([-1])];
bool reduce_max_2_keep_dims_0 = const()[name = string("reduce_max_2_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 24, 64, 1]> reduce_max_2_cast_fp16 = reduce_max(axes = reduce_max_2_axes_0, keep_dims = reduce_max_2_keep_dims_0, x = x_77_cast_fp16)[name = string("reduce_max_2_cast_fp16")];
tensor<fp16, [1, 24, 64, 1024]> x_79_cast_fp16 = sub(x = x_77_cast_fp16, y = reduce_max_2_cast_fp16)[name = string("x_79_cast_fp16")];
tensor<fp16, [1, 24, 64, 1024]> exp_x_5_cast_fp16 = exp(x = x_79_cast_fp16)[name = string("exp_x_5_cast_fp16")];
tensor<int32, [1]> var_789_axes_0 = const()[name = string("op_789_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_789_keep_dims_0 = const()[name = string("op_789_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 24, 64, 1]> var_789_cast_fp16 = reduce_sum(axes = var_789_axes_0, keep_dims = var_789_keep_dims_0, x = exp_x_5_cast_fp16)[name = string("op_789_cast_fp16")];
tensor<fp16, [1, 24, 64, 1024]> var_790_cast_fp16 = real_div(x = exp_x_5_cast_fp16, y = var_789_cast_fp16)[name = string("op_790_cast_fp16")];
tensor<int32, [3]> concat_48 = const()[name = string("concat_48"), val = tensor<int32, [3]>([24, 64, 1024])];
tensor<fp16, [24, 64, 1024]> reshape_6_cast_fp16 = reshape(shape = concat_48, x = var_790_cast_fp16)[name = string("reshape_6_cast_fp16")];
tensor<int32, [3]> concat_49 = const()[name = string("concat_49"), val = tensor<int32, [3]>([24, 1024, 128])];
tensor<fp16, [24, 1024, 128]> reshape_7_cast_fp16 = reshape(shape = concat_49, x = x_75_cast_fp16)[name = string("reshape_7_cast_fp16")];
bool matmul_2_transpose_x_0 = const()[name = string("matmul_2_transpose_x_0"), val = bool(false)];
bool matmul_2_transpose_y_0 = const()[name = string("matmul_2_transpose_y_0"), val = bool(false)];
tensor<fp16, [24, 64, 128]> matmul_2_cast_fp16 = matmul(transpose_x = matmul_2_transpose_x_0, transpose_y = matmul_2_transpose_y_0, x = reshape_6_cast_fp16, y = reshape_7_cast_fp16)[name = string("matmul_2_cast_fp16")];
tensor<int32, [4]> concat_53 = const()[name = string("concat_53"), val = tensor<int32, [4]>([1, 24, 64, 128])];
tensor<fp16, [1, 24, 64, 128]> reshape_8_cast_fp16 = reshape(shape = concat_53, x = matmul_2_cast_fp16)[name = string("reshape_8_cast_fp16")];
tensor<int32, [4]> var_793_perm_0 = const()[name = string("op_793_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_795 = const()[name = string("op_795"), val = tensor<int32, [3]>([1, 64, 3072])];
tensor<fp16, [1, 64, 24, 128]> var_793_cast_fp16 = transpose(perm = var_793_perm_0, x = reshape_8_cast_fp16)[name = string("transpose_30")];
tensor<fp16, [1, 64, 3072]> input_33_cast_fp16 = reshape(shape = var_795, x = var_793_cast_fp16)[name = string("input_33_cast_fp16")];
tensor<fp16, [3072, 3072]> model_model_layers_16_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396593216))), lut = tensor<fp16, [384, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(401311872))))[name = string("model_model_layers_16_self_attn_o_proj_weight_promoted_to_fp16_palettized")];
tensor<fp16, [1, 64, 3072]> linear_2_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_16_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_33_cast_fp16)[name = string("linear_2_cast_fp16")];
tensor<fp16, [1, 64, 3072]> hidden_states_21_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = linear_2_cast_fp16)[name = string("hidden_states_21_cast_fp16")];
tensor<int32, [1]> mean_11_axes_0 = const()[name = string("mean_11_axes_0"), val = tensor<int32, [1]>([-1])];
bool mean_11_keep_dims_0 = const()[name = string("mean_11_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 64, 1]> mean_11_cast_fp16 = reduce_mean(axes = mean_11_axes_0, keep_dims = mean_11_keep_dims_0, x = hidden_states_21_cast_fp16)[name = string("mean_11_cast_fp16")];
tensor<fp16, [1, 64, 3072]> input_35_cast_fp16 = sub(x = hidden_states_21_cast_fp16, y = mean_11_cast_fp16)[name = string("input_35_cast_fp16")];
tensor<int32, [1]> var_806_axes_0 = const()[name = string("op_806_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [3072]> model_model_layers_16_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_16_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(401324224)))];
tensor<fp16, [1, 64, 3072]> var_806_cast_fp16 = layer_norm(axes = var_806_axes_0, epsilon = var_49_to_fp16, gamma = model_model_layers_16_post_attention_layernorm_weight_to_fp16, x = input_35_cast_fp16)[name = string("op_806_cast_fp16")];
tensor<int32, [3]> var_813 = const()[name = string("op_813"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [1]> input_37_axes_0 = const()[name = string("input_37_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 3072, 64]> var_814 = transpose(perm = var_813, x = var_806_cast_fp16)[name = string("transpose_29")];
tensor<fp16, [1, 3072, 1, 64]> input_37 = expand_dims(axes = input_37_axes_0, x = var_814)[name = string("input_37")];
string input_39_pad_type_0 = const()[name = string("input_39_pad_type_0"), val = string("valid")];
tensor<int32, [2]> input_39_strides_0 = const()[name = string("input_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> input_39_pad_0 = const()[name = string("input_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> input_39_dilations_0 = const()[name = string("input_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 input_39_groups_0 = const()[name = string("input_39_groups_0"), val = int32(1)];
tensor<fp16, [1, 8192, 1, 64]> input_39 = conv(dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = model_model_layers_16_mlp_gate_proj_weight_palettized, x = input_37)[name = string("input_39")];
string up_states_5_pad_type_0 = const()[name = string("up_states_5_pad_type_0"), val = string("valid")];
tensor<int32, [2]> up_states_5_strides_0 = const()[name = string("up_states_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> up_states_5_pad_0 = const()[name = string("up_states_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> up_states_5_dilations_0 = const()[name = string("up_states_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 up_states_5_groups_0 = const()[name = string("up_states_5_groups_0"), val = int32(1)];
tensor<fp16, [1, 8192, 1, 64]> up_states_5 = conv(dilations = up_states_5_dilations_0, groups = up_states_5_groups_0, pad = up_states_5_pad_0, pad_type = up_states_5_pad_type_0, strides = up_states_5_strides_0, weight = model_model_layers_16_mlp_up_proj_weight_palettized, x = input_37)[name = string("up_states_5")];
tensor<fp16, [1, 8192, 1, 64]> gate_states_5 = silu(x = input_39)[name = string("gate_states_5")];
tensor<fp16, [1, 8192, 1, 64]> input_41 = mul(x = gate_states_5, y = up_states_5)[name = string("input_41")];
string hidden_states_23_pad_type_0 = const()[name = string("hidden_states_23_pad_type_0"), val = string("valid")];
tensor<int32, [2]> hidden_states_23_strides_0 = const()[name = string("hidden_states_23_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> hidden_states_23_pad_0 = const()[name = string("hidden_states_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> hidden_states_23_dilations_0 = const()[name = string("hidden_states_23_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 hidden_states_23_groups_0 = const()[name = string("hidden_states_23_groups_0"), val = int32(1)];
tensor<fp16, [1, 3072, 1, 64]> hidden_states_23 = conv(dilations = hidden_states_23_dilations_0, groups = hidden_states_23_groups_0, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = hidden_states_23_strides_0, weight = model_model_layers_16_mlp_down_proj_weight_palettized, x = input_41)[name = string("hidden_states_23")];
tensor<int32, [1]> var_836_axes_0 = const()[name = string("op_836_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 3072, 64]> var_836 = squeeze(axes = var_836_axes_0, x = hidden_states_23)[name = string("op_836")];
tensor<int32, [3]> var_837 = const()[name = string("op_837"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 64, 3072]> var_838 = transpose(perm = var_837, x = var_836)[name = string("transpose_28")];
tensor<fp16, [1, 64, 3072]> hidden_states_25_cast_fp16 = add(x = hidden_states_21_cast_fp16, y = var_838)[name = string("hidden_states_25_cast_fp16")];
tensor<int32, [1]> mean_13_axes_0 = const()[name = string("mean_13_axes_0"), val = tensor<int32, [1]>([-1])];
bool mean_13_keep_dims_0 = const()[name = string("mean_13_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 64, 1]> mean_13_cast_fp16 = reduce_mean(axes = mean_13_axes_0, keep_dims = mean_13_keep_dims_0, x = hidden_states_25_cast_fp16)[name = string("mean_13_cast_fp16")];
tensor<fp16, [1, 64, 3072]> input_43_cast_fp16 = sub(x = hidden_states_25_cast_fp16, y = mean_13_cast_fp16)[name = string("input_43_cast_fp16")];
tensor<int32, [1]> var_846_axes_0 = const()[name = string("op_846_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [3072]> model_model_layers_17_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_17_input_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(401330432)))];
tensor<fp16, [1, 64, 3072]> var_846_cast_fp16 = layer_norm(axes = var_846_axes_0, epsilon = var_49_to_fp16, gamma = model_model_layers_17_input_layernorm_weight_to_fp16, x = input_43_cast_fp16)[name = string("op_846_cast_fp16")];
tensor<int32, [3]> var_850 = const()[name = string("op_850"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [1]> var_852_axes_0 = const()[name = string("op_852_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 3072, 64]> var_851 = transpose(perm = var_850, x = var_846_cast_fp16)[name = string("transpose_27")];
tensor<fp16, [1, 3072, 1, 64]> var_852 = expand_dims(axes = var_852_axes_0, x = var_851)[name = string("op_852")];
string query_states_13_pad_type_0 = const()[name = string("query_states_13_pad_type_0"), val = string("valid")];
tensor<int32, [2]> query_states_13_strides_0 = const()[name = string("query_states_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> query_states_13_pad_0 = const()[name = string("query_states_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> query_states_13_dilations_0 = const()[name = string("query_states_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 query_states_13_groups_0 = const()[name = string("query_states_13_groups_0"), val = int32(1)];
tensor<fp16, [1, 3072, 1, 64]> query_states_13 = conv(dilations = query_states_13_dilations_0, groups = query_states_13_groups_0, pad = query_states_13_pad_0, pad_type = query_states_13_pad_type_0, strides = query_states_13_strides_0, weight = model_model_layers_17_self_attn_q_proj_weight_palettized, x = var_852)[name = string("query_states_13")];
string key_states_19_pad_type_0 = const()[name = string("key_states_19_pad_type_0"), val = string("valid")];
tensor<int32, [2]> key_states_19_strides_0 = const()[name = string("key_states_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> key_states_19_pad_0 = const()[name = string("key_states_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> key_states_19_dilations_0 = const()[name = string("key_states_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 key_states_19_groups_0 = const()[name = string("key_states_19_groups_0"), val = int32(1)];
tensor<fp16, [1, 1024, 1, 64]> key_states_19 = conv(dilations = key_states_19_dilations_0, groups = key_states_19_groups_0, pad = key_states_19_pad_0, pad_type = key_states_19_pad_type_0, strides = key_states_19_strides_0, weight = model_model_layers_17_self_attn_k_proj_weight_palettized, x = var_852)[name = string("key_states_19")];
string value_states_19_pad_type_0 = const()[name = string("value_states_19_pad_type_0"), val = string("valid")];
tensor<int32, [2]> value_states_19_strides_0 = const()[name = string("value_states_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> value_states_19_pad_0 = const()[name = string("value_states_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> value_states_19_dilations_0 = const()[name = string("value_states_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 value_states_19_groups_0 = const()[name = string("value_states_19_groups_0"), val = int32(1)];
tensor<fp16, [1, 1024, 1, 64]> value_states_19 = conv(dilations = value_states_19_dilations_0, groups = value_states_19_groups_0, pad = value_states_19_pad_0, pad_type = value_states_19_pad_type_0, strides = value_states_19_strides_0, weight = model_model_layers_17_self_attn_v_proj_weight_palettized, x = var_852)[name = string("value_states_19")];
tensor<int32, [4]> var_872 = const()[name = string("op_872"), val = tensor<int32, [4]>([1, 24, 128, 64])];
tensor<fp16, [1, 24, 128, 64]> var_873 = reshape(shape = var_872, x = query_states_13)[name = string("op_873")];
tensor<int32, [4]> var_874 = const()[name = string("op_874"), val = tensor<int32, [4]>([0, 1, 3, 2])];
tensor<int32, [4]> var_876 = const()[name = string("op_876"), val = tensor<int32, [4]>([1, 8, 128, 64])];
tensor<fp16, [1, 8, 128, 64]> var_877 = reshape(shape = var_876, x = key_states_19)[name = string("op_877")];
tensor<int32, [4]> var_878 = const()[name = string("op_878"), val = tensor<int32, [4]>([0, 1, 3, 2])];
tensor<int32, [4]> var_880 = const()[name = string("op_880"), val = tensor<int32, [4]>([1, 8, 128, 64])];
tensor<fp16, [1, 8, 128, 64]> var_881 = reshape(shape = var_880, x = value_states_19)[name = string("op_881")];
tensor<int32, [4]> var_882 = const()[name = string("op_882"), val = tensor<int32, [4]>([0, 1, 3, 2])];
tensor<int32, [4]> x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor<int32, [4]>([1, 24, 64, 64])];
tensor<bool, [4]> x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 24, 64, 128]> x_85 = transpose(perm = var_874, x = var_873)[name = string("transpose_26")];
tensor<fp16, [1, 24, 64, 64]> x1_13 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = x_85)[name = string("x1_13")];
tensor<int32, [4]> x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor<int32, [4]>([1, 24, 64, 128])];
tensor<bool, [4]> x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 24, 64, 64]> x2_13 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = x_85)[name = string("x2_13")];
tensor<fp16, [1, 24, 64, 64]> var_900 = mul(x = x1_13, y = cos_7)[name = string("op_900")];
tensor<fp16, [1, 24, 64, 64]> var_901 = mul(x = x2_13, y = sin_7)[name = string("op_901")];
tensor<fp16, [1, 24, 64, 64]> var_902 = sub(x = var_900, y = var_901)[name = string("op_902")];
tensor<fp16, [1, 24, 64, 64]> var_903 = mul(x = x2_13, y = cos_7)[name = string("op_903")];
tensor<fp16, [1, 24, 64, 64]> var_904 = mul(x = x1_13, y = sin_7)[name = string("op_904")];
tensor<fp16, [1, 24, 64, 64]> var_905 = add(x = var_903, y = var_904)[name = string("op_905")];
bool rotated_13_interleave_0 = const()[name = string("rotated_13_interleave_0"), val = bool(false)];
tensor<fp16, [1, 24, 64, 128]> rotated_13 = concat(axis = var_47, interleave = rotated_13_interleave_0, values = (var_902, var_905))[name = string("rotated_13")];
tensor<int32, [4]> x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor<int32, [4]>([1, 8, 64, 64])];
tensor<bool, [4]> x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 8, 64, 128]> x_89 = transpose(perm = var_878, x = var_877)[name = string("transpose_25")];
tensor<fp16, [1, 8, 64, 64]> x1_15 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = x_89)[name = string("x1_15")];
tensor<int32, [4]> x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor<int32, [4]>([1, 8, 64, 128])];
tensor<bool, [4]> x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 8, 64, 64]> x2_15 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = x_89)[name = string("x2_15")];
tensor<fp16, [1, 8, 64, 64]> var_921 = mul(x = x1_15, y = cos_7)[name = string("op_921")];
tensor<fp16, [1, 8, 64, 64]> var_922 = mul(x = x2_15, y = sin_7)[name = string("op_922")];
tensor<fp16, [1, 8, 64, 64]> var_923 = sub(x = var_921, y = var_922)[name = string("op_923")];
tensor<fp16, [1, 8, 64, 64]> var_924 = mul(x = x2_15, y = cos_7)[name = string("op_924")];
tensor<fp16, [1, 8, 64, 64]> var_925 = mul(x = x1_15, y = sin_7)[name = string("op_925")];
tensor<fp16, [1, 8, 64, 64]> var_926 = add(x = var_924, y = var_925)[name = string("op_926")];
bool rotated_15_interleave_0 = const()[name = string("rotated_15_interleave_0"), val = bool(false)];
tensor<fp16, [1, 8, 64, 128]> rotated_15 = concat(axis = var_47, interleave = rotated_15_interleave_0, values = (var_923, var_926))[name = string("rotated_15")];
tensor<int32, [1]> expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor<int32, [1]>([17])];
tensor<int32, [1]> expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_40 = const()[name = string("expand_dims_40"), val = tensor<int32, [1]>([18])];
int32 concat_56_axis_0 = const()[name = string("concat_56_axis_0"), val = int32(0)];
bool concat_56_interleave_0 = const()[name = string("concat_56_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_56 = concat(axis = concat_56_axis_0, interleave = concat_56_interleave_0, values = (expand_dims_36, expand_dims_37, current_pos, expand_dims_39))[name = string("concat_56")];
tensor<int32, [1]> concat_57_values1_0 = const()[name = string("concat_57_values1_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> concat_57_values3_0 = const()[name = string("concat_57_values3_0"), val = tensor<int32, [1]>([0])];
int32 concat_57_axis_0 = const()[name = string("concat_57_axis_0"), val = int32(0)];
bool concat_57_interleave_0 = const()[name = string("concat_57_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_57 = concat(axis = concat_57_axis_0, interleave = concat_57_interleave_0, values = (expand_dims_40, concat_57_values1_0, var_344, concat_57_values3_0))[name = string("concat_57")];
tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_56, begin_mask = model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0, end = concat_57, end_mask = model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_7_stride_0, update = rotated_15, x = coreml_update_state_19)[name = string("model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16")];
write_state(data = model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_20_write_state")];
tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_20 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_20")];
tensor<int32, [1]> expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor<int32, [1]>([45])];
tensor<int32, [1]> expand_dims_43 = const()[name = string("expand_dims_43"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_45 = const()[name = string("expand_dims_45"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_46 = const()[name = string("expand_dims_46"), val = tensor<int32, [1]>([46])];
int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)];
bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (expand_dims_42, expand_dims_43, current_pos, expand_dims_45))[name = string("concat_60")];
tensor<int32, [1]> concat_61_values1_0 = const()[name = string("concat_61_values1_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> concat_61_values3_0 = const()[name = string("concat_61_values3_0"), val = tensor<int32, [1]>([0])];
int32 concat_61_axis_0 = const()[name = string("concat_61_axis_0"), val = int32(0)];
bool concat_61_interleave_0 = const()[name = string("concat_61_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_61 = concat(axis = concat_61_axis_0, interleave = concat_61_interleave_0, values = (expand_dims_46, concat_61_values1_0, var_344, concat_61_values3_0))[name = string("concat_61")];
tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<fp16, [1, 8, 64, 128]> value_states_21 = transpose(perm = var_882, x = var_881)[name = string("transpose_24")];
tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_60, begin_mask = model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0, end = concat_61, end_mask = model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_8_stride_0, update = value_states_21, x = coreml_update_state_20)[name = string("model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16")];
write_state(data = model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_21_write_state")];
tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_21 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_21")];
tensor<int32, [4]> var_949_begin_0 = const()[name = string("op_949_begin_0"), val = tensor<int32, [4]>([17, 0, 0, 0])];
tensor<int32, [4]> var_949_end_0 = const()[name = string("op_949_end_0"), val = tensor<int32, [4]>([18, 8, 1024, 128])];
tensor<bool, [4]> var_949_end_mask_0 = const()[name = string("op_949_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<fp16, [1, 8, 1024, 128]> var_949_cast_fp16 = slice_by_index(begin = var_949_begin_0, end = var_949_end_0, end_mask = var_949_end_mask_0, x = coreml_update_state_21)[name = string("op_949_cast_fp16")];
tensor<int32, [1]> K_layer_cache_7_axes_0 = const()[name = string("K_layer_cache_7_axes_0"), val = tensor<int32, [1]>([0])];
tensor<fp16, [8, 1024, 128]> K_layer_cache_7_cast_fp16 = squeeze(axes = K_layer_cache_7_axes_0, x = var_949_cast_fp16)[name = string("K_layer_cache_7_cast_fp16")];
tensor<int32, [4]> var_951_begin_0 = const()[name = string("op_951_begin_0"), val = tensor<int32, [4]>([45, 0, 0, 0])];
tensor<int32, [4]> var_951_end_0 = const()[name = string("op_951_end_0"), val = tensor<int32, [4]>([46, 8, 1024, 128])];
tensor<bool, [4]> var_951_end_mask_0 = const()[name = string("op_951_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<fp16, [1, 8, 1024, 128]> var_951_cast_fp16 = slice_by_index(begin = var_951_begin_0, end = var_951_end_0, end_mask = var_951_end_mask_0, x = coreml_update_state_21)[name = string("op_951_cast_fp16")];
tensor<int32, [1]> V_layer_cache_7_axes_0 = const()[name = string("V_layer_cache_7_axes_0"), val = tensor<int32, [1]>([0])];
tensor<fp16, [8, 1024, 128]> V_layer_cache_7_cast_fp16 = squeeze(axes = V_layer_cache_7_axes_0, x = var_951_cast_fp16)[name = string("V_layer_cache_7_cast_fp16")];
tensor<int32, [1]> x_95_axes_0 = const()[name = string("x_95_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [8, 1, 1024, 128]> x_95_cast_fp16 = expand_dims(axes = x_95_axes_0, x = K_layer_cache_7_cast_fp16)[name = string("x_95_cast_fp16")];
tensor<int32, [4]> var_960 = const()[name = string("op_960"), val = tensor<int32, [4]>([1, 3, 1, 1])];
tensor<fp16, [8, 3, 1024, 128]> x_97_cast_fp16 = tile(reps = var_960, x = x_95_cast_fp16)[name = string("x_97_cast_fp16")];
tensor<int32, [4]> var_964 = const()[name = string("op_964"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
tensor<fp16, [1, 24, 1024, 128]> var_965_cast_fp16 = reshape(shape = var_964, x = x_97_cast_fp16)[name = string("op_965_cast_fp16")];
tensor<int32, [1]> x_101_axes_0 = const()[name = string("x_101_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [8, 1, 1024, 128]> x_101_cast_fp16 = expand_dims(axes = x_101_axes_0, x = V_layer_cache_7_cast_fp16)[name = string("x_101_cast_fp16")];
tensor<int32, [4]> var_967 = const()[name = string("op_967"), val = tensor<int32, [4]>([1, 3, 1, 1])];
tensor<fp16, [8, 3, 1024, 128]> x_103_cast_fp16 = tile(reps = var_967, x = x_101_cast_fp16)[name = string("x_103_cast_fp16")];
bool var_974_transpose_x_0 = const()[name = string("op_974_transpose_x_0"), val = bool(false)];
bool var_974_transpose_y_0 = const()[name = string("op_974_transpose_y_0"), val = bool(true)];
tensor<fp16, [1, 24, 64, 1024]> var_974_cast_fp16 = matmul(transpose_x = var_974_transpose_x_0, transpose_y = var_974_transpose_y_0, x = rotated_13, y = var_965_cast_fp16)[name = string("op_974_cast_fp16")];
fp16 var_975_to_fp16 = const()[name = string("op_975_to_fp16"), val = fp16(0x1.6ap-4)];
tensor<fp16, [1, 24, 64, 1024]> attn_weights_7_cast_fp16 = mul(x = var_974_cast_fp16, y = var_975_to_fp16)[name = string("attn_weights_7_cast_fp16")];
tensor<fp16, [1, 24, 64, 1024]> x_105_cast_fp16 = add(x = attn_weights_7_cast_fp16, y = causal_mask)[name = string("x_105_cast_fp16")];
tensor<int32, [1]> reduce_max_3_axes_0 = const()[name = string("reduce_max_3_axes_0"), val = tensor<int32, [1]>([-1])];
bool reduce_max_3_keep_dims_0 = const()[name = string("reduce_max_3_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 24, 64, 1]> reduce_max_3_cast_fp16 = reduce_max(axes = reduce_max_3_axes_0, keep_dims = reduce_max_3_keep_dims_0, x = x_105_cast_fp16)[name = string("reduce_max_3_cast_fp16")];
tensor<fp16, [1, 24, 64, 1024]> x_107_cast_fp16 = sub(x = x_105_cast_fp16, y = reduce_max_3_cast_fp16)[name = string("x_107_cast_fp16")];
tensor<fp16, [1, 24, 64, 1024]> exp_x_7_cast_fp16 = exp(x = x_107_cast_fp16)[name = string("exp_x_7_cast_fp16")];
tensor<int32, [1]> var_986_axes_0 = const()[name = string("op_986_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_986_keep_dims_0 = const()[name = string("op_986_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 24, 64, 1]> var_986_cast_fp16 = reduce_sum(axes = var_986_axes_0, keep_dims = var_986_keep_dims_0, x = exp_x_7_cast_fp16)[name = string("op_986_cast_fp16")];
tensor<fp16, [1, 24, 64, 1024]> var_987_cast_fp16 = real_div(x = exp_x_7_cast_fp16, y = var_986_cast_fp16)[name = string("op_987_cast_fp16")];
tensor<int32, [3]> concat_66 = const()[name = string("concat_66"), val = tensor<int32, [3]>([24, 64, 1024])];
tensor<fp16, [24, 64, 1024]> reshape_9_cast_fp16 = reshape(shape = concat_66, x = var_987_cast_fp16)[name = string("reshape_9_cast_fp16")];
tensor<int32, [3]> concat_67 = const()[name = string("concat_67"), val = tensor<int32, [3]>([24, 1024, 128])];
tensor<fp16, [24, 1024, 128]> reshape_10_cast_fp16 = reshape(shape = concat_67, x = x_103_cast_fp16)[name = string("reshape_10_cast_fp16")];
bool matmul_3_transpose_x_0 = const()[name = string("matmul_3_transpose_x_0"), val = bool(false)];
bool matmul_3_transpose_y_0 = const()[name = string("matmul_3_transpose_y_0"), val = bool(false)];
tensor<fp16, [24, 64, 128]> matmul_3_cast_fp16 = matmul(transpose_x = matmul_3_transpose_x_0, transpose_y = matmul_3_transpose_y_0, x = reshape_9_cast_fp16, y = reshape_10_cast_fp16)[name = string("matmul_3_cast_fp16")];
tensor<int32, [4]> concat_71 = const()[name = string("concat_71"), val = tensor<int32, [4]>([1, 24, 64, 128])];
tensor<fp16, [1, 24, 64, 128]> reshape_11_cast_fp16 = reshape(shape = concat_71, x = matmul_3_cast_fp16)[name = string("reshape_11_cast_fp16")];
tensor<int32, [4]> var_990_perm_0 = const()[name = string("op_990_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_992 = const()[name = string("op_992"), val = tensor<int32, [3]>([1, 64, 3072])];
tensor<fp16, [1, 64, 24, 128]> var_990_cast_fp16 = transpose(perm = var_990_perm_0, x = reshape_11_cast_fp16)[name = string("transpose_23")];
tensor<fp16, [1, 64, 3072]> input_47_cast_fp16 = reshape(shape = var_992, x = var_990_cast_fp16)[name = string("input_47_cast_fp16")];
tensor<fp16, [3072, 3072]> model_model_layers_17_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(401336640))), lut = tensor<fp16, [384, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406055296))))[name = string("model_model_layers_17_self_attn_o_proj_weight_promoted_to_fp16_palettized")];
tensor<fp16, [1, 64, 3072]> linear_3_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_17_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_47_cast_fp16)[name = string("linear_3_cast_fp16")];
tensor<fp16, [1, 64, 3072]> hidden_states_29_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = linear_3_cast_fp16)[name = string("hidden_states_29_cast_fp16")];
tensor<int32, [1]> mean_15_axes_0 = const()[name = string("mean_15_axes_0"), val = tensor<int32, [1]>([-1])];
bool mean_15_keep_dims_0 = const()[name = string("mean_15_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 64, 1]> mean_15_cast_fp16 = reduce_mean(axes = mean_15_axes_0, keep_dims = mean_15_keep_dims_0, x = hidden_states_29_cast_fp16)[name = string("mean_15_cast_fp16")];
tensor<fp16, [1, 64, 3072]> input_49_cast_fp16 = sub(x = hidden_states_29_cast_fp16, y = mean_15_cast_fp16)[name = string("input_49_cast_fp16")];
tensor<int32, [1]> var_1003_axes_0 = const()[name = string("op_1003_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [3072]> model_model_layers_17_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_17_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406067648)))];
tensor<fp16, [1, 64, 3072]> var_1003_cast_fp16 = layer_norm(axes = var_1003_axes_0, epsilon = var_49_to_fp16, gamma = model_model_layers_17_post_attention_layernorm_weight_to_fp16, x = input_49_cast_fp16)[name = string("op_1003_cast_fp16")];
tensor<int32, [3]> var_1010 = const()[name = string("op_1010"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [1]> input_51_axes_0 = const()[name = string("input_51_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 3072, 64]> var_1011 = transpose(perm = var_1010, x = var_1003_cast_fp16)[name = string("transpose_22")];
tensor<fp16, [1, 3072, 1, 64]> input_51 = expand_dims(axes = input_51_axes_0, x = var_1011)[name = string("input_51")];
string input_53_pad_type_0 = const()[name = string("input_53_pad_type_0"), val = string("valid")];
tensor<int32, [2]> input_53_strides_0 = const()[name = string("input_53_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> input_53_pad_0 = const()[name = string("input_53_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> input_53_dilations_0 = const()[name = string("input_53_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 input_53_groups_0 = const()[name = string("input_53_groups_0"), val = int32(1)];
tensor<fp16, [1, 8192, 1, 64]> input_53 = conv(dilations = input_53_dilations_0, groups = input_53_groups_0, pad = input_53_pad_0, pad_type = input_53_pad_type_0, strides = input_53_strides_0, weight = model_model_layers_17_mlp_gate_proj_weight_palettized, x = input_51)[name = string("input_53")];
string up_states_7_pad_type_0 = const()[name = string("up_states_7_pad_type_0"), val = string("valid")];
tensor<int32, [2]> up_states_7_strides_0 = const()[name = string("up_states_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> up_states_7_pad_0 = const()[name = string("up_states_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> up_states_7_dilations_0 = const()[name = string("up_states_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 up_states_7_groups_0 = const()[name = string("up_states_7_groups_0"), val = int32(1)];
tensor<fp16, [1, 8192, 1, 64]> up_states_7 = conv(dilations = up_states_7_dilations_0, groups = up_states_7_groups_0, pad = up_states_7_pad_0, pad_type = up_states_7_pad_type_0, strides = up_states_7_strides_0, weight = model_model_layers_17_mlp_up_proj_weight_palettized, x = input_51)[name = string("up_states_7")];
tensor<fp16, [1, 8192, 1, 64]> gate_states_7 = silu(x = input_53)[name = string("gate_states_7")];
tensor<fp16, [1, 8192, 1, 64]> input_55 = mul(x = gate_states_7, y = up_states_7)[name = string("input_55")];
string hidden_states_31_pad_type_0 = const()[name = string("hidden_states_31_pad_type_0"), val = string("valid")];
tensor<int32, [2]> hidden_states_31_strides_0 = const()[name = string("hidden_states_31_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> hidden_states_31_pad_0 = const()[name = string("hidden_states_31_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> hidden_states_31_dilations_0 = const()[name = string("hidden_states_31_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 hidden_states_31_groups_0 = const()[name = string("hidden_states_31_groups_0"), val = int32(1)];
tensor<fp16, [1, 3072, 1, 64]> hidden_states_31 = conv(dilations = hidden_states_31_dilations_0, groups = hidden_states_31_groups_0, pad = hidden_states_31_pad_0, pad_type = hidden_states_31_pad_type_0, strides = hidden_states_31_strides_0, weight = model_model_layers_17_mlp_down_proj_weight_palettized, x = input_55)[name = string("hidden_states_31")];
tensor<int32, [1]> var_1033_axes_0 = const()[name = string("op_1033_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 3072, 64]> var_1033 = squeeze(axes = var_1033_axes_0, x = hidden_states_31)[name = string("op_1033")];
tensor<int32, [3]> var_1034 = const()[name = string("op_1034"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 64, 3072]> var_1035 = transpose(perm = var_1034, x = var_1033)[name = string("transpose_21")];
tensor<fp16, [1, 64, 3072]> hidden_states_33_cast_fp16 = add(x = hidden_states_29_cast_fp16, y = var_1035)[name = string("hidden_states_33_cast_fp16")];
tensor<int32, [1]> mean_17_axes_0 = const()[name = string("mean_17_axes_0"), val = tensor<int32, [1]>([-1])];
bool mean_17_keep_dims_0 = const()[name = string("mean_17_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 64, 1]> mean_17_cast_fp16 = reduce_mean(axes = mean_17_axes_0, keep_dims = mean_17_keep_dims_0, x = hidden_states_33_cast_fp16)[name = string("mean_17_cast_fp16")];
tensor<fp16, [1, 64, 3072]> input_57_cast_fp16 = sub(x = hidden_states_33_cast_fp16, y = mean_17_cast_fp16)[name = string("input_57_cast_fp16")];
tensor<int32, [1]> var_1043_axes_0 = const()[name = string("op_1043_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [3072]> model_model_layers_18_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_18_input_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406073856)))];
tensor<fp16, [1, 64, 3072]> var_1043_cast_fp16 = layer_norm(axes = var_1043_axes_0, epsilon = var_49_to_fp16, gamma = model_model_layers_18_input_layernorm_weight_to_fp16, x = input_57_cast_fp16)[name = string("op_1043_cast_fp16")];
tensor<int32, [3]> var_1047 = const()[name = string("op_1047"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [1]> var_1049_axes_0 = const()[name = string("op_1049_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 3072, 64]> var_1048 = transpose(perm = var_1047, x = var_1043_cast_fp16)[name = string("transpose_20")];
tensor<fp16, [1, 3072, 1, 64]> var_1049 = expand_dims(axes = var_1049_axes_0, x = var_1048)[name = string("op_1049")];
string query_states_17_pad_type_0 = const()[name = string("query_states_17_pad_type_0"), val = string("valid")];
tensor<int32, [2]> query_states_17_strides_0 = const()[name = string("query_states_17_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> query_states_17_pad_0 = const()[name = string("query_states_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> query_states_17_dilations_0 = const()[name = string("query_states_17_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 query_states_17_groups_0 = const()[name = string("query_states_17_groups_0"), val = int32(1)];
tensor<fp16, [1, 3072, 1, 64]> query_states_17 = conv(dilations = query_states_17_dilations_0, groups = query_states_17_groups_0, pad = query_states_17_pad_0, pad_type = query_states_17_pad_type_0, strides = query_states_17_strides_0, weight = model_model_layers_18_self_attn_q_proj_weight_palettized, x = var_1049)[name = string("query_states_17")];
string key_states_25_pad_type_0 = const()[name = string("key_states_25_pad_type_0"), val = string("valid")];
tensor<int32, [2]> key_states_25_strides_0 = const()[name = string("key_states_25_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> key_states_25_pad_0 = const()[name = string("key_states_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> key_states_25_dilations_0 = const()[name = string("key_states_25_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 key_states_25_groups_0 = const()[name = string("key_states_25_groups_0"), val = int32(1)];
tensor<fp16, [1, 1024, 1, 64]> key_states_25 = conv(dilations = key_states_25_dilations_0, groups = key_states_25_groups_0, pad = key_states_25_pad_0, pad_type = key_states_25_pad_type_0, strides = key_states_25_strides_0, weight = model_model_layers_18_self_attn_k_proj_weight_palettized, x = var_1049)[name = string("key_states_25")];
string value_states_25_pad_type_0 = const()[name = string("value_states_25_pad_type_0"), val = string("valid")];
tensor<int32, [2]> value_states_25_strides_0 = const()[name = string("value_states_25_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> value_states_25_pad_0 = const()[name = string("value_states_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> value_states_25_dilations_0 = const()[name = string("value_states_25_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 value_states_25_groups_0 = const()[name = string("value_states_25_groups_0"), val = int32(1)];
tensor<fp16, [1, 1024, 1, 64]> value_states_25 = conv(dilations = value_states_25_dilations_0, groups = value_states_25_groups_0, pad = value_states_25_pad_0, pad_type = value_states_25_pad_type_0, strides = value_states_25_strides_0, weight = model_model_layers_18_self_attn_v_proj_weight_palettized, x = var_1049)[name = string("value_states_25")];
tensor<int32, [4]> var_1069 = const()[name = string("op_1069"), val = tensor<int32, [4]>([1, 24, 128, 64])];
tensor<fp16, [1, 24, 128, 64]> var_1070 = reshape(shape = var_1069, x = query_states_17)[name = string("op_1070")];
tensor<int32, [4]> var_1071 = const()[name = string("op_1071"), val = tensor<int32, [4]>([0, 1, 3, 2])];
tensor<int32, [4]> var_1073 = const()[name = string("op_1073"), val = tensor<int32, [4]>([1, 8, 128, 64])];
tensor<fp16, [1, 8, 128, 64]> var_1074 = reshape(shape = var_1073, x = key_states_25)[name = string("op_1074")];
tensor<int32, [4]> var_1075 = const()[name = string("op_1075"), val = tensor<int32, [4]>([0, 1, 3, 2])];
tensor<int32, [4]> var_1077 = const()[name = string("op_1077"), val = tensor<int32, [4]>([1, 8, 128, 64])];
tensor<fp16, [1, 8, 128, 64]> var_1078 = reshape(shape = var_1077, x = value_states_25)[name = string("op_1078")];
tensor<int32, [4]> var_1079 = const()[name = string("op_1079"), val = tensor<int32, [4]>([0, 1, 3, 2])];
tensor<int32, [4]> x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor<int32, [4]>([1, 24, 64, 64])];
tensor<bool, [4]> x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 24, 64, 128]> x_113 = transpose(perm = var_1071, x = var_1070)[name = string("transpose_19")];
tensor<fp16, [1, 24, 64, 64]> x1_17 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = x_113)[name = string("x1_17")];
tensor<int32, [4]> x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor<int32, [4]>([1, 24, 64, 128])];
tensor<bool, [4]> x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 24, 64, 64]> x2_17 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = x_113)[name = string("x2_17")];
tensor<fp16, [1, 24, 64, 64]> var_1097 = mul(x = x1_17, y = cos_7)[name = string("op_1097")];
tensor<fp16, [1, 24, 64, 64]> var_1098 = mul(x = x2_17, y = sin_7)[name = string("op_1098")];
tensor<fp16, [1, 24, 64, 64]> var_1099 = sub(x = var_1097, y = var_1098)[name = string("op_1099")];
tensor<fp16, [1, 24, 64, 64]> var_1100 = mul(x = x2_17, y = cos_7)[name = string("op_1100")];
tensor<fp16, [1, 24, 64, 64]> var_1101 = mul(x = x1_17, y = sin_7)[name = string("op_1101")];
tensor<fp16, [1, 24, 64, 64]> var_1102 = add(x = var_1100, y = var_1101)[name = string("op_1102")];
bool rotated_17_interleave_0 = const()[name = string("rotated_17_interleave_0"), val = bool(false)];
tensor<fp16, [1, 24, 64, 128]> rotated_17 = concat(axis = var_47, interleave = rotated_17_interleave_0, values = (var_1099, var_1102))[name = string("rotated_17")];
tensor<int32, [4]> x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor<int32, [4]>([1, 8, 64, 64])];
tensor<bool, [4]> x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 8, 64, 128]> x_117 = transpose(perm = var_1075, x = var_1074)[name = string("transpose_18")];
tensor<fp16, [1, 8, 64, 64]> x1_19 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = x_117)[name = string("x1_19")];
tensor<int32, [4]> x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor<int32, [4]>([1, 8, 64, 128])];
tensor<bool, [4]> x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 8, 64, 64]> x2_19 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = x_117)[name = string("x2_19")];
tensor<fp16, [1, 8, 64, 64]> var_1118 = mul(x = x1_19, y = cos_7)[name = string("op_1118")];
tensor<fp16, [1, 8, 64, 64]> var_1119 = mul(x = x2_19, y = sin_7)[name = string("op_1119")];
tensor<fp16, [1, 8, 64, 64]> var_1120 = sub(x = var_1118, y = var_1119)[name = string("op_1120")];
tensor<fp16, [1, 8, 64, 64]> var_1121 = mul(x = x2_19, y = cos_7)[name = string("op_1121")];
tensor<fp16, [1, 8, 64, 64]> var_1122 = mul(x = x1_19, y = sin_7)[name = string("op_1122")];
tensor<fp16, [1, 8, 64, 64]> var_1123 = add(x = var_1121, y = var_1122)[name = string("op_1123")];
bool rotated_19_interleave_0 = const()[name = string("rotated_19_interleave_0"), val = bool(false)];
tensor<fp16, [1, 8, 64, 128]> rotated_19 = concat(axis = var_47, interleave = rotated_19_interleave_0, values = (var_1120, var_1123))[name = string("rotated_19")];
tensor<int32, [1]> expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor<int32, [1]>([18])];
tensor<int32, [1]> expand_dims_49 = const()[name = string("expand_dims_49"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor<int32, [1]>([19])];
int32 concat_74_axis_0 = const()[name = string("concat_74_axis_0"), val = int32(0)];
bool concat_74_interleave_0 = const()[name = string("concat_74_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_74 = concat(axis = concat_74_axis_0, interleave = concat_74_interleave_0, values = (expand_dims_48, expand_dims_49, current_pos, expand_dims_51))[name = string("concat_74")];
tensor<int32, [1]> concat_75_values1_0 = const()[name = string("concat_75_values1_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> concat_75_values3_0 = const()[name = string("concat_75_values3_0"), val = tensor<int32, [1]>([0])];
int32 concat_75_axis_0 = const()[name = string("concat_75_axis_0"), val = int32(0)];
bool concat_75_interleave_0 = const()[name = string("concat_75_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_75 = concat(axis = concat_75_axis_0, interleave = concat_75_interleave_0, values = (expand_dims_52, concat_75_values1_0, var_344, concat_75_values3_0))[name = string("concat_75")];
tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_9_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_74, begin_mask = model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0, end = concat_75, end_mask = model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_9_stride_0, update = rotated_19, x = coreml_update_state_21)[name = string("model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16")];
write_state(data = model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_22_write_state")];
tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_22 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_22")];
tensor<int32, [1]> expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor<int32, [1]>([46])];
tensor<int32, [1]> expand_dims_55 = const()[name = string("expand_dims_55"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_57 = const()[name = string("expand_dims_57"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_58 = const()[name = string("expand_dims_58"), val = tensor<int32, [1]>([47])];
int32 concat_78_axis_0 = const()[name = string("concat_78_axis_0"), val = int32(0)];
bool concat_78_interleave_0 = const()[name = string("concat_78_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_78 = concat(axis = concat_78_axis_0, interleave = concat_78_interleave_0, values = (expand_dims_54, expand_dims_55, current_pos, expand_dims_57))[name = string("concat_78")];
tensor<int32, [1]> concat_79_values1_0 = const()[name = string("concat_79_values1_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> concat_79_values3_0 = const()[name = string("concat_79_values3_0"), val = tensor<int32, [1]>([0])];
int32 concat_79_axis_0 = const()[name = string("concat_79_axis_0"), val = int32(0)];
bool concat_79_interleave_0 = const()[name = string("concat_79_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_79 = concat(axis = concat_79_axis_0, interleave = concat_79_interleave_0, values = (expand_dims_58, concat_79_values1_0, var_344, concat_79_values3_0))[name = string("concat_79")];
tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_10_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<fp16, [1, 8, 64, 128]> value_states_27 = transpose(perm = var_1079, x = var_1078)[name = string("transpose_17")];
tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_78, begin_mask = model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0, end = concat_79, end_mask = model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_10_stride_0, update = value_states_27, x = coreml_update_state_22)[name = string("model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16")];
write_state(data = model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_23_write_state")];
tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_23 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_23")];
tensor<int32, [4]> var_1146_begin_0 = const()[name = string("op_1146_begin_0"), val = tensor<int32, [4]>([18, 0, 0, 0])];
tensor<int32, [4]> var_1146_end_0 = const()[name = string("op_1146_end_0"), val = tensor<int32, [4]>([19, 8, 1024, 128])];
tensor<bool, [4]> var_1146_end_mask_0 = const()[name = string("op_1146_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<fp16, [1, 8, 1024, 128]> var_1146_cast_fp16 = slice_by_index(begin = var_1146_begin_0, end = var_1146_end_0, end_mask = var_1146_end_mask_0, x = coreml_update_state_23)[name = string("op_1146_cast_fp16")];
tensor<int32, [1]> K_layer_cache_9_axes_0 = const()[name = string("K_layer_cache_9_axes_0"), val = tensor<int32, [1]>([0])];
tensor<fp16, [8, 1024, 128]> K_layer_cache_9_cast_fp16 = squeeze(axes = K_layer_cache_9_axes_0, x = var_1146_cast_fp16)[name = string("K_layer_cache_9_cast_fp16")];
tensor<int32, [4]> var_1148_begin_0 = const()[name = string("op_1148_begin_0"), val = tensor<int32, [4]>([46, 0, 0, 0])];
tensor<int32, [4]> var_1148_end_0 = const()[name = string("op_1148_end_0"), val = tensor<int32, [4]>([47, 8, 1024, 128])];
tensor<bool, [4]> var_1148_end_mask_0 = const()[name = string("op_1148_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<fp16, [1, 8, 1024, 128]> var_1148_cast_fp16 = slice_by_index(begin = var_1148_begin_0, end = var_1148_end_0, end_mask = var_1148_end_mask_0, x = coreml_update_state_23)[name = string("op_1148_cast_fp16")];
tensor<int32, [1]> V_layer_cache_9_axes_0 = const()[name = string("V_layer_cache_9_axes_0"), val = tensor<int32, [1]>([0])];
tensor<fp16, [8, 1024, 128]> V_layer_cache_9_cast_fp16 = squeeze(axes = V_layer_cache_9_axes_0, x = var_1148_cast_fp16)[name = string("V_layer_cache_9_cast_fp16")];
tensor<int32, [1]> x_123_axes_0 = const()[name = string("x_123_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [8, 1, 1024, 128]> x_123_cast_fp16 = expand_dims(axes = x_123_axes_0, x = K_layer_cache_9_cast_fp16)[name = string("x_123_cast_fp16")];
tensor<int32, [4]> var_1157 = const()[name = string("op_1157"), val = tensor<int32, [4]>([1, 3, 1, 1])];
tensor<fp16, [8, 3, 1024, 128]> x_125_cast_fp16 = tile(reps = var_1157, x = x_123_cast_fp16)[name = string("x_125_cast_fp16")];
tensor<int32, [4]> var_1161 = const()[name = string("op_1161"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
tensor<fp16, [1, 24, 1024, 128]> var_1162_cast_fp16 = reshape(shape = var_1161, x = x_125_cast_fp16)[name = string("op_1162_cast_fp16")];
tensor<int32, [1]> x_129_axes_0 = const()[name = string("x_129_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [8, 1, 1024, 128]> x_129_cast_fp16 = expand_dims(axes = x_129_axes_0, x = V_layer_cache_9_cast_fp16)[name = string("x_129_cast_fp16")];
tensor<int32, [4]> var_1164 = const()[name = string("op_1164"), val = tensor<int32, [4]>([1, 3, 1, 1])];
tensor<fp16, [8, 3, 1024, 128]> x_131_cast_fp16 = tile(reps = var_1164, x = x_129_cast_fp16)[name = string("x_131_cast_fp16")];
bool var_1171_transpose_x_0 = const()[name = string("op_1171_transpose_x_0"), val = bool(false)];
bool var_1171_transpose_y_0 = const()[name = string("op_1171_transpose_y_0"), val = bool(true)];
tensor<fp16, [1, 24, 64, 1024]> var_1171_cast_fp16 = matmul(transpose_x = var_1171_transpose_x_0, transpose_y = var_1171_transpose_y_0, x = rotated_17, y = var_1162_cast_fp16)[name = string("op_1171_cast_fp16")];
fp16 var_1172_to_fp16 = const()[name = string("op_1172_to_fp16"), val = fp16(0x1.6ap-4)];
tensor<fp16, [1, 24, 64, 1024]> attn_weights_9_cast_fp16 = mul(x = var_1171_cast_fp16, y = var_1172_to_fp16)[name = string("attn_weights_9_cast_fp16")];
tensor<fp16, [1, 24, 64, 1024]> x_133_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = causal_mask)[name = string("x_133_cast_fp16")];
tensor<int32, [1]> reduce_max_4_axes_0 = const()[name = string("reduce_max_4_axes_0"), val = tensor<int32, [1]>([-1])];
bool reduce_max_4_keep_dims_0 = const()[name = string("reduce_max_4_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 24, 64, 1]> reduce_max_4_cast_fp16 = reduce_max(axes = reduce_max_4_axes_0, keep_dims = reduce_max_4_keep_dims_0, x = x_133_cast_fp16)[name = string("reduce_max_4_cast_fp16")];
tensor<fp16, [1, 24, 64, 1024]> x_135_cast_fp16 = sub(x = x_133_cast_fp16, y = reduce_max_4_cast_fp16)[name = string("x_135_cast_fp16")];
tensor<fp16, [1, 24, 64, 1024]> exp_x_9_cast_fp16 = exp(x = x_135_cast_fp16)[name = string("exp_x_9_cast_fp16")];
tensor<int32, [1]> var_1183_axes_0 = const()[name = string("op_1183_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_1183_keep_dims_0 = const()[name = string("op_1183_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 24, 64, 1]> var_1183_cast_fp16 = reduce_sum(axes = var_1183_axes_0, keep_dims = var_1183_keep_dims_0, x = exp_x_9_cast_fp16)[name = string("op_1183_cast_fp16")];
tensor<fp16, [1, 24, 64, 1024]> var_1184_cast_fp16 = real_div(x = exp_x_9_cast_fp16, y = var_1183_cast_fp16)[name = string("op_1184_cast_fp16")];
tensor<int32, [3]> concat_84 = const()[name = string("concat_84"), val = tensor<int32, [3]>([24, 64, 1024])];
tensor<fp16, [24, 64, 1024]> reshape_12_cast_fp16 = reshape(shape = concat_84, x = var_1184_cast_fp16)[name = string("reshape_12_cast_fp16")];
tensor<int32, [3]> concat_85 = const()[name = string("concat_85"), val = tensor<int32, [3]>([24, 1024, 128])];
tensor<fp16, [24, 1024, 128]> reshape_13_cast_fp16 = reshape(shape = concat_85, x = x_131_cast_fp16)[name = string("reshape_13_cast_fp16")];
bool matmul_4_transpose_x_0 = const()[name = string("matmul_4_transpose_x_0"), val = bool(false)];
bool matmul_4_transpose_y_0 = const()[name = string("matmul_4_transpose_y_0"), val = bool(false)];
tensor<fp16, [24, 64, 128]> matmul_4_cast_fp16 = matmul(transpose_x = matmul_4_transpose_x_0, transpose_y = matmul_4_transpose_y_0, x = reshape_12_cast_fp16, y = reshape_13_cast_fp16)[name = string("matmul_4_cast_fp16")];
tensor<int32, [4]> concat_89 = const()[name = string("concat_89"), val = tensor<int32, [4]>([1, 24, 64, 128])];
tensor<fp16, [1, 24, 64, 128]> reshape_14_cast_fp16 = reshape(shape = concat_89, x = matmul_4_cast_fp16)[name = string("reshape_14_cast_fp16")];
tensor<int32, [4]> var_1187_perm_0 = const()[name = string("op_1187_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_1189 = const()[name = string("op_1189"), val = tensor<int32, [3]>([1, 64, 3072])];
tensor<fp16, [1, 64, 24, 128]> var_1187_cast_fp16 = transpose(perm = var_1187_perm_0, x = reshape_14_cast_fp16)[name = string("transpose_16")];
tensor<fp16, [1, 64, 3072]> input_61_cast_fp16 = reshape(shape = var_1189, x = var_1187_cast_fp16)[name = string("input_61_cast_fp16")];
tensor<fp16, [3072, 3072]> model_model_layers_18_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(406080064))), lut = tensor<fp16, [384, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410798720))))[name = string("model_model_layers_18_self_attn_o_proj_weight_promoted_to_fp16_palettized")];
tensor<fp16, [1, 64, 3072]> linear_4_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_18_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_61_cast_fp16)[name = string("linear_4_cast_fp16")];
tensor<fp16, [1, 64, 3072]> hidden_states_37_cast_fp16 = add(x = hidden_states_33_cast_fp16, y = linear_4_cast_fp16)[name = string("hidden_states_37_cast_fp16")];
tensor<int32, [1]> mean_19_axes_0 = const()[name = string("mean_19_axes_0"), val = tensor<int32, [1]>([-1])];
bool mean_19_keep_dims_0 = const()[name = string("mean_19_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 64, 1]> mean_19_cast_fp16 = reduce_mean(axes = mean_19_axes_0, keep_dims = mean_19_keep_dims_0, x = hidden_states_37_cast_fp16)[name = string("mean_19_cast_fp16")];
tensor<fp16, [1, 64, 3072]> input_63_cast_fp16 = sub(x = hidden_states_37_cast_fp16, y = mean_19_cast_fp16)[name = string("input_63_cast_fp16")];
tensor<int32, [1]> var_1200_axes_0 = const()[name = string("op_1200_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [3072]> model_model_layers_18_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_18_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410811072)))];
tensor<fp16, [1, 64, 3072]> var_1200_cast_fp16 = layer_norm(axes = var_1200_axes_0, epsilon = var_49_to_fp16, gamma = model_model_layers_18_post_attention_layernorm_weight_to_fp16, x = input_63_cast_fp16)[name = string("op_1200_cast_fp16")];
tensor<int32, [3]> var_1207 = const()[name = string("op_1207"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [1]> input_65_axes_0 = const()[name = string("input_65_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 3072, 64]> var_1208 = transpose(perm = var_1207, x = var_1200_cast_fp16)[name = string("transpose_15")];
tensor<fp16, [1, 3072, 1, 64]> input_65 = expand_dims(axes = input_65_axes_0, x = var_1208)[name = string("input_65")];
string input_67_pad_type_0 = const()[name = string("input_67_pad_type_0"), val = string("valid")];
tensor<int32, [2]> input_67_strides_0 = const()[name = string("input_67_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> input_67_pad_0 = const()[name = string("input_67_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> input_67_dilations_0 = const()[name = string("input_67_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 input_67_groups_0 = const()[name = string("input_67_groups_0"), val = int32(1)];
tensor<fp16, [1, 8192, 1, 64]> input_67 = conv(dilations = input_67_dilations_0, groups = input_67_groups_0, pad = input_67_pad_0, pad_type = input_67_pad_type_0, strides = input_67_strides_0, weight = model_model_layers_18_mlp_gate_proj_weight_palettized, x = input_65)[name = string("input_67")];
string up_states_9_pad_type_0 = const()[name = string("up_states_9_pad_type_0"), val = string("valid")];
tensor<int32, [2]> up_states_9_strides_0 = const()[name = string("up_states_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> up_states_9_pad_0 = const()[name = string("up_states_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> up_states_9_dilations_0 = const()[name = string("up_states_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 up_states_9_groups_0 = const()[name = string("up_states_9_groups_0"), val = int32(1)];
tensor<fp16, [1, 8192, 1, 64]> up_states_9 = conv(dilations = up_states_9_dilations_0, groups = up_states_9_groups_0, pad = up_states_9_pad_0, pad_type = up_states_9_pad_type_0, strides = up_states_9_strides_0, weight = model_model_layers_18_mlp_up_proj_weight_palettized, x = input_65)[name = string("up_states_9")];
tensor<fp16, [1, 8192, 1, 64]> gate_states_9 = silu(x = input_67)[name = string("gate_states_9")];
tensor<fp16, [1, 8192, 1, 64]> input_69 = mul(x = gate_states_9, y = up_states_9)[name = string("input_69")];
string hidden_states_39_pad_type_0 = const()[name = string("hidden_states_39_pad_type_0"), val = string("valid")];
tensor<int32, [2]> hidden_states_39_strides_0 = const()[name = string("hidden_states_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> hidden_states_39_pad_0 = const()[name = string("hidden_states_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> hidden_states_39_dilations_0 = const()[name = string("hidden_states_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 hidden_states_39_groups_0 = const()[name = string("hidden_states_39_groups_0"), val = int32(1)];
tensor<fp16, [1, 3072, 1, 64]> hidden_states_39 = conv(dilations = hidden_states_39_dilations_0, groups = hidden_states_39_groups_0, pad = hidden_states_39_pad_0, pad_type = hidden_states_39_pad_type_0, strides = hidden_states_39_strides_0, weight = model_model_layers_18_mlp_down_proj_weight_palettized, x = input_69)[name = string("hidden_states_39")];
tensor<int32, [1]> var_1230_axes_0 = const()[name = string("op_1230_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 3072, 64]> var_1230 = squeeze(axes = var_1230_axes_0, x = hidden_states_39)[name = string("op_1230")];
tensor<int32, [3]> var_1231 = const()[name = string("op_1231"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 64, 3072]> var_1232 = transpose(perm = var_1231, x = var_1230)[name = string("transpose_14")];
tensor<fp16, [1, 64, 3072]> hidden_states_41_cast_fp16 = add(x = hidden_states_37_cast_fp16, y = var_1232)[name = string("hidden_states_41_cast_fp16")];
tensor<int32, [1]> mean_21_axes_0 = const()[name = string("mean_21_axes_0"), val = tensor<int32, [1]>([-1])];
bool mean_21_keep_dims_0 = const()[name = string("mean_21_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 64, 1]> mean_21_cast_fp16 = reduce_mean(axes = mean_21_axes_0, keep_dims = mean_21_keep_dims_0, x = hidden_states_41_cast_fp16)[name = string("mean_21_cast_fp16")];
tensor<fp16, [1, 64, 3072]> input_71_cast_fp16 = sub(x = hidden_states_41_cast_fp16, y = mean_21_cast_fp16)[name = string("input_71_cast_fp16")];
tensor<int32, [1]> var_1240_axes_0 = const()[name = string("op_1240_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [3072]> model_model_layers_19_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_19_input_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410817280)))];
tensor<fp16, [1, 64, 3072]> var_1240_cast_fp16 = layer_norm(axes = var_1240_axes_0, epsilon = var_49_to_fp16, gamma = model_model_layers_19_input_layernorm_weight_to_fp16, x = input_71_cast_fp16)[name = string("op_1240_cast_fp16")];
tensor<int32, [3]> var_1244 = const()[name = string("op_1244"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [1]> var_1246_axes_0 = const()[name = string("op_1246_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 3072, 64]> var_1245 = transpose(perm = var_1244, x = var_1240_cast_fp16)[name = string("transpose_13")];
tensor<fp16, [1, 3072, 1, 64]> var_1246 = expand_dims(axes = var_1246_axes_0, x = var_1245)[name = string("op_1246")];
string query_states_21_pad_type_0 = const()[name = string("query_states_21_pad_type_0"), val = string("valid")];
tensor<int32, [2]> query_states_21_strides_0 = const()[name = string("query_states_21_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> query_states_21_pad_0 = const()[name = string("query_states_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> query_states_21_dilations_0 = const()[name = string("query_states_21_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 query_states_21_groups_0 = const()[name = string("query_states_21_groups_0"), val = int32(1)];
tensor<fp16, [1, 3072, 1, 64]> query_states_21 = conv(dilations = query_states_21_dilations_0, groups = query_states_21_groups_0, pad = query_states_21_pad_0, pad_type = query_states_21_pad_type_0, strides = query_states_21_strides_0, weight = model_model_layers_19_self_attn_q_proj_weight_palettized, x = var_1246)[name = string("query_states_21")];
string key_states_31_pad_type_0 = const()[name = string("key_states_31_pad_type_0"), val = string("valid")];
tensor<int32, [2]> key_states_31_strides_0 = const()[name = string("key_states_31_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> key_states_31_pad_0 = const()[name = string("key_states_31_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> key_states_31_dilations_0 = const()[name = string("key_states_31_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 key_states_31_groups_0 = const()[name = string("key_states_31_groups_0"), val = int32(1)];
tensor<fp16, [1, 1024, 1, 64]> key_states_31 = conv(dilations = key_states_31_dilations_0, groups = key_states_31_groups_0, pad = key_states_31_pad_0, pad_type = key_states_31_pad_type_0, strides = key_states_31_strides_0, weight = model_model_layers_19_self_attn_k_proj_weight_palettized, x = var_1246)[name = string("key_states_31")];
string value_states_31_pad_type_0 = const()[name = string("value_states_31_pad_type_0"), val = string("valid")];
tensor<int32, [2]> value_states_31_strides_0 = const()[name = string("value_states_31_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> value_states_31_pad_0 = const()[name = string("value_states_31_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> value_states_31_dilations_0 = const()[name = string("value_states_31_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 value_states_31_groups_0 = const()[name = string("value_states_31_groups_0"), val = int32(1)];
tensor<fp16, [1, 1024, 1, 64]> value_states_31 = conv(dilations = value_states_31_dilations_0, groups = value_states_31_groups_0, pad = value_states_31_pad_0, pad_type = value_states_31_pad_type_0, strides = value_states_31_strides_0, weight = model_model_layers_19_self_attn_v_proj_weight_palettized, x = var_1246)[name = string("value_states_31")];
tensor<int32, [4]> var_1266 = const()[name = string("op_1266"), val = tensor<int32, [4]>([1, 24, 128, 64])];
tensor<fp16, [1, 24, 128, 64]> var_1267 = reshape(shape = var_1266, x = query_states_21)[name = string("op_1267")];
tensor<int32, [4]> var_1268 = const()[name = string("op_1268"), val = tensor<int32, [4]>([0, 1, 3, 2])];
tensor<int32, [4]> var_1270 = const()[name = string("op_1270"), val = tensor<int32, [4]>([1, 8, 128, 64])];
tensor<fp16, [1, 8, 128, 64]> var_1271 = reshape(shape = var_1270, x = key_states_31)[name = string("op_1271")];
tensor<int32, [4]> var_1272 = const()[name = string("op_1272"), val = tensor<int32, [4]>([0, 1, 3, 2])];
tensor<int32, [4]> var_1274 = const()[name = string("op_1274"), val = tensor<int32, [4]>([1, 8, 128, 64])];
tensor<fp16, [1, 8, 128, 64]> var_1275 = reshape(shape = var_1274, x = value_states_31)[name = string("op_1275")];
tensor<int32, [4]> var_1276 = const()[name = string("op_1276"), val = tensor<int32, [4]>([0, 1, 3, 2])];
tensor<int32, [4]> x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor<int32, [4]>([1, 24, 64, 64])];
tensor<bool, [4]> x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 24, 64, 128]> x_141 = transpose(perm = var_1268, x = var_1267)[name = string("transpose_12")];
tensor<fp16, [1, 24, 64, 64]> x1_21 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = x_141)[name = string("x1_21")];
tensor<int32, [4]> x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor<int32, [4]>([1, 24, 64, 128])];
tensor<bool, [4]> x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 24, 64, 64]> x2_21 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = x_141)[name = string("x2_21")];
tensor<fp16, [1, 24, 64, 64]> var_1294 = mul(x = x1_21, y = cos_7)[name = string("op_1294")];
tensor<fp16, [1, 24, 64, 64]> var_1295 = mul(x = x2_21, y = sin_7)[name = string("op_1295")];
tensor<fp16, [1, 24, 64, 64]> var_1296 = sub(x = var_1294, y = var_1295)[name = string("op_1296")];
tensor<fp16, [1, 24, 64, 64]> var_1297 = mul(x = x2_21, y = cos_7)[name = string("op_1297")];
tensor<fp16, [1, 24, 64, 64]> var_1298 = mul(x = x1_21, y = sin_7)[name = string("op_1298")];
tensor<fp16, [1, 24, 64, 64]> var_1299 = add(x = var_1297, y = var_1298)[name = string("op_1299")];
bool rotated_21_interleave_0 = const()[name = string("rotated_21_interleave_0"), val = bool(false)];
tensor<fp16, [1, 24, 64, 128]> rotated_21 = concat(axis = var_47, interleave = rotated_21_interleave_0, values = (var_1296, var_1299))[name = string("rotated_21")];
tensor<int32, [4]> x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor<int32, [4]>([1, 8, 64, 64])];
tensor<bool, [4]> x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 8, 64, 128]> x_145 = transpose(perm = var_1272, x = var_1271)[name = string("transpose_11")];
tensor<fp16, [1, 8, 64, 64]> x1_23 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = x_145)[name = string("x1_23")];
tensor<int32, [4]> x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor<int32, [4]>([1, 8, 64, 128])];
tensor<bool, [4]> x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 8, 64, 64]> x2_23 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = x_145)[name = string("x2_23")];
tensor<fp16, [1, 8, 64, 64]> var_1315 = mul(x = x1_23, y = cos_7)[name = string("op_1315")];
tensor<fp16, [1, 8, 64, 64]> var_1316 = mul(x = x2_23, y = sin_7)[name = string("op_1316")];
tensor<fp16, [1, 8, 64, 64]> var_1317 = sub(x = var_1315, y = var_1316)[name = string("op_1317")];
tensor<fp16, [1, 8, 64, 64]> var_1318 = mul(x = x2_23, y = cos_7)[name = string("op_1318")];
tensor<fp16, [1, 8, 64, 64]> var_1319 = mul(x = x1_23, y = sin_7)[name = string("op_1319")];
tensor<fp16, [1, 8, 64, 64]> var_1320 = add(x = var_1318, y = var_1319)[name = string("op_1320")];
bool rotated_23_interleave_0 = const()[name = string("rotated_23_interleave_0"), val = bool(false)];
tensor<fp16, [1, 8, 64, 128]> rotated_23 = concat(axis = var_47, interleave = rotated_23_interleave_0, values = (var_1317, var_1320))[name = string("rotated_23")];
tensor<int32, [1]> expand_dims_60 = const()[name = string("expand_dims_60"), val = tensor<int32, [1]>([19])];
tensor<int32, [1]> expand_dims_61 = const()[name = string("expand_dims_61"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_63 = const()[name = string("expand_dims_63"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor<int32, [1]>([20])];
int32 concat_92_axis_0 = const()[name = string("concat_92_axis_0"), val = int32(0)];
bool concat_92_interleave_0 = const()[name = string("concat_92_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_92 = concat(axis = concat_92_axis_0, interleave = concat_92_interleave_0, values = (expand_dims_60, expand_dims_61, current_pos, expand_dims_63))[name = string("concat_92")];
tensor<int32, [1]> concat_93_values1_0 = const()[name = string("concat_93_values1_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> concat_93_values3_0 = const()[name = string("concat_93_values3_0"), val = tensor<int32, [1]>([0])];
int32 concat_93_axis_0 = const()[name = string("concat_93_axis_0"), val = int32(0)];
bool concat_93_interleave_0 = const()[name = string("concat_93_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_93 = concat(axis = concat_93_axis_0, interleave = concat_93_interleave_0, values = (expand_dims_64, concat_93_values1_0, var_344, concat_93_values3_0))[name = string("concat_93")];
tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_11_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_92, begin_mask = model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0, end = concat_93, end_mask = model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_11_stride_0, update = rotated_23, x = coreml_update_state_23)[name = string("model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16")];
write_state(data = model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_24_write_state")];
tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_24 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_24")];
tensor<int32, [1]> expand_dims_66 = const()[name = string("expand_dims_66"), val = tensor<int32, [1]>([47])];
tensor<int32, [1]> expand_dims_67 = const()[name = string("expand_dims_67"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_69 = const()[name = string("expand_dims_69"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_70 = const()[name = string("expand_dims_70"), val = tensor<int32, [1]>([48])];
int32 concat_96_axis_0 = const()[name = string("concat_96_axis_0"), val = int32(0)];
bool concat_96_interleave_0 = const()[name = string("concat_96_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_96 = concat(axis = concat_96_axis_0, interleave = concat_96_interleave_0, values = (expand_dims_66, expand_dims_67, current_pos, expand_dims_69))[name = string("concat_96")];
tensor<int32, [1]> concat_97_values1_0 = const()[name = string("concat_97_values1_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> concat_97_values3_0 = const()[name = string("concat_97_values3_0"), val = tensor<int32, [1]>([0])];
int32 concat_97_axis_0 = const()[name = string("concat_97_axis_0"), val = int32(0)];
bool concat_97_interleave_0 = const()[name = string("concat_97_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_97 = concat(axis = concat_97_axis_0, interleave = concat_97_interleave_0, values = (expand_dims_70, concat_97_values1_0, var_344, concat_97_values3_0))[name = string("concat_97")];
tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_12_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<fp16, [1, 8, 64, 128]> value_states_33 = transpose(perm = var_1276, x = var_1275)[name = string("transpose_10")];
tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_96, begin_mask = model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0, end = concat_97, end_mask = model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_12_stride_0, update = value_states_33, x = coreml_update_state_24)[name = string("model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16")];
write_state(data = model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_25_write_state")];
tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_25 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_25")];
tensor<int32, [4]> var_1343_begin_0 = const()[name = string("op_1343_begin_0"), val = tensor<int32, [4]>([19, 0, 0, 0])];
tensor<int32, [4]> var_1343_end_0 = const()[name = string("op_1343_end_0"), val = tensor<int32, [4]>([20, 8, 1024, 128])];
tensor<bool, [4]> var_1343_end_mask_0 = const()[name = string("op_1343_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<fp16, [1, 8, 1024, 128]> var_1343_cast_fp16 = slice_by_index(begin = var_1343_begin_0, end = var_1343_end_0, end_mask = var_1343_end_mask_0, x = coreml_update_state_25)[name = string("op_1343_cast_fp16")];
tensor<int32, [1]> K_layer_cache_11_axes_0 = const()[name = string("K_layer_cache_11_axes_0"), val = tensor<int32, [1]>([0])];
tensor<fp16, [8, 1024, 128]> K_layer_cache_11_cast_fp16 = squeeze(axes = K_layer_cache_11_axes_0, x = var_1343_cast_fp16)[name = string("K_layer_cache_11_cast_fp16")];
tensor<int32, [4]> var_1345_begin_0 = const()[name = string("op_1345_begin_0"), val = tensor<int32, [4]>([47, 0, 0, 0])];
tensor<int32, [4]> var_1345_end_0 = const()[name = string("op_1345_end_0"), val = tensor<int32, [4]>([48, 8, 1024, 128])];
tensor<bool, [4]> var_1345_end_mask_0 = const()[name = string("op_1345_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<fp16, [1, 8, 1024, 128]> var_1345_cast_fp16 = slice_by_index(begin = var_1345_begin_0, end = var_1345_end_0, end_mask = var_1345_end_mask_0, x = coreml_update_state_25)[name = string("op_1345_cast_fp16")];
tensor<int32, [1]> V_layer_cache_11_axes_0 = const()[name = string("V_layer_cache_11_axes_0"), val = tensor<int32, [1]>([0])];
tensor<fp16, [8, 1024, 128]> V_layer_cache_11_cast_fp16 = squeeze(axes = V_layer_cache_11_axes_0, x = var_1345_cast_fp16)[name = string("V_layer_cache_11_cast_fp16")];
tensor<int32, [1]> x_151_axes_0 = const()[name = string("x_151_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [8, 1, 1024, 128]> x_151_cast_fp16 = expand_dims(axes = x_151_axes_0, x = K_layer_cache_11_cast_fp16)[name = string("x_151_cast_fp16")];
tensor<int32, [4]> var_1354 = const()[name = string("op_1354"), val = tensor<int32, [4]>([1, 3, 1, 1])];
tensor<fp16, [8, 3, 1024, 128]> x_153_cast_fp16 = tile(reps = var_1354, x = x_151_cast_fp16)[name = string("x_153_cast_fp16")];
tensor<int32, [4]> var_1358 = const()[name = string("op_1358"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
tensor<fp16, [1, 24, 1024, 128]> var_1359_cast_fp16 = reshape(shape = var_1358, x = x_153_cast_fp16)[name = string("op_1359_cast_fp16")];
tensor<int32, [1]> x_157_axes_0 = const()[name = string("x_157_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [8, 1, 1024, 128]> x_157_cast_fp16 = expand_dims(axes = x_157_axes_0, x = V_layer_cache_11_cast_fp16)[name = string("x_157_cast_fp16")];
tensor<int32, [4]> var_1361 = const()[name = string("op_1361"), val = tensor<int32, [4]>([1, 3, 1, 1])];
tensor<fp16, [8, 3, 1024, 128]> x_159_cast_fp16 = tile(reps = var_1361, x = x_157_cast_fp16)[name = string("x_159_cast_fp16")];
bool var_1368_transpose_x_0 = const()[name = string("op_1368_transpose_x_0"), val = bool(false)];
bool var_1368_transpose_y_0 = const()[name = string("op_1368_transpose_y_0"), val = bool(true)];
tensor<fp16, [1, 24, 64, 1024]> var_1368_cast_fp16 = matmul(transpose_x = var_1368_transpose_x_0, transpose_y = var_1368_transpose_y_0, x = rotated_21, y = var_1359_cast_fp16)[name = string("op_1368_cast_fp16")];
fp16 var_1369_to_fp16 = const()[name = string("op_1369_to_fp16"), val = fp16(0x1.6ap-4)];
tensor<fp16, [1, 24, 64, 1024]> attn_weights_11_cast_fp16 = mul(x = var_1368_cast_fp16, y = var_1369_to_fp16)[name = string("attn_weights_11_cast_fp16")];
tensor<fp16, [1, 24, 64, 1024]> x_161_cast_fp16 = add(x = attn_weights_11_cast_fp16, y = causal_mask)[name = string("x_161_cast_fp16")];
tensor<int32, [1]> reduce_max_5_axes_0 = const()[name = string("reduce_max_5_axes_0"), val = tensor<int32, [1]>([-1])];
bool reduce_max_5_keep_dims_0 = const()[name = string("reduce_max_5_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 24, 64, 1]> reduce_max_5_cast_fp16 = reduce_max(axes = reduce_max_5_axes_0, keep_dims = reduce_max_5_keep_dims_0, x = x_161_cast_fp16)[name = string("reduce_max_5_cast_fp16")];
tensor<fp16, [1, 24, 64, 1024]> x_163_cast_fp16 = sub(x = x_161_cast_fp16, y = reduce_max_5_cast_fp16)[name = string("x_163_cast_fp16")];
tensor<fp16, [1, 24, 64, 1024]> exp_x_11_cast_fp16 = exp(x = x_163_cast_fp16)[name = string("exp_x_11_cast_fp16")];
tensor<int32, [1]> var_1380_axes_0 = const()[name = string("op_1380_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_1380_keep_dims_0 = const()[name = string("op_1380_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 24, 64, 1]> var_1380_cast_fp16 = reduce_sum(axes = var_1380_axes_0, keep_dims = var_1380_keep_dims_0, x = exp_x_11_cast_fp16)[name = string("op_1380_cast_fp16")];
tensor<fp16, [1, 24, 64, 1024]> var_1381_cast_fp16 = real_div(x = exp_x_11_cast_fp16, y = var_1380_cast_fp16)[name = string("op_1381_cast_fp16")];
tensor<int32, [3]> concat_102 = const()[name = string("concat_102"), val = tensor<int32, [3]>([24, 64, 1024])];
tensor<fp16, [24, 64, 1024]> reshape_15_cast_fp16 = reshape(shape = concat_102, x = var_1381_cast_fp16)[name = string("reshape_15_cast_fp16")];
tensor<int32, [3]> concat_103 = const()[name = string("concat_103"), val = tensor<int32, [3]>([24, 1024, 128])];
tensor<fp16, [24, 1024, 128]> reshape_16_cast_fp16 = reshape(shape = concat_103, x = x_159_cast_fp16)[name = string("reshape_16_cast_fp16")];
bool matmul_5_transpose_x_0 = const()[name = string("matmul_5_transpose_x_0"), val = bool(false)];
bool matmul_5_transpose_y_0 = const()[name = string("matmul_5_transpose_y_0"), val = bool(false)];
tensor<fp16, [24, 64, 128]> matmul_5_cast_fp16 = matmul(transpose_x = matmul_5_transpose_x_0, transpose_y = matmul_5_transpose_y_0, x = reshape_15_cast_fp16, y = reshape_16_cast_fp16)[name = string("matmul_5_cast_fp16")];
tensor<int32, [4]> concat_107 = const()[name = string("concat_107"), val = tensor<int32, [4]>([1, 24, 64, 128])];
tensor<fp16, [1, 24, 64, 128]> reshape_17_cast_fp16 = reshape(shape = concat_107, x = matmul_5_cast_fp16)[name = string("reshape_17_cast_fp16")];
tensor<int32, [4]> var_1384_perm_0 = const()[name = string("op_1384_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_1386 = const()[name = string("op_1386"), val = tensor<int32, [3]>([1, 64, 3072])];
tensor<fp16, [1, 64, 24, 128]> var_1384_cast_fp16 = transpose(perm = var_1384_perm_0, x = reshape_17_cast_fp16)[name = string("transpose_9")];
tensor<fp16, [1, 64, 3072]> input_75_cast_fp16 = reshape(shape = var_1386, x = var_1384_cast_fp16)[name = string("input_75_cast_fp16")];
tensor<fp16, [3072, 3072]> model_model_layers_19_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410823488))), lut = tensor<fp16, [384, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415542144))))[name = string("model_model_layers_19_self_attn_o_proj_weight_promoted_to_fp16_palettized")];
tensor<fp16, [1, 64, 3072]> linear_5_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_19_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_75_cast_fp16)[name = string("linear_5_cast_fp16")];
tensor<fp16, [1, 64, 3072]> hidden_states_45_cast_fp16 = add(x = hidden_states_41_cast_fp16, y = linear_5_cast_fp16)[name = string("hidden_states_45_cast_fp16")];
tensor<int32, [1]> mean_23_axes_0 = const()[name = string("mean_23_axes_0"), val = tensor<int32, [1]>([-1])];
bool mean_23_keep_dims_0 = const()[name = string("mean_23_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 64, 1]> mean_23_cast_fp16 = reduce_mean(axes = mean_23_axes_0, keep_dims = mean_23_keep_dims_0, x = hidden_states_45_cast_fp16)[name = string("mean_23_cast_fp16")];
tensor<fp16, [1, 64, 3072]> input_77_cast_fp16 = sub(x = hidden_states_45_cast_fp16, y = mean_23_cast_fp16)[name = string("input_77_cast_fp16")];
tensor<int32, [1]> var_1397_axes_0 = const()[name = string("op_1397_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [3072]> model_model_layers_19_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_19_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415554496)))];
tensor<fp16, [1, 64, 3072]> var_1397_cast_fp16 = layer_norm(axes = var_1397_axes_0, epsilon = var_49_to_fp16, gamma = model_model_layers_19_post_attention_layernorm_weight_to_fp16, x = input_77_cast_fp16)[name = string("op_1397_cast_fp16")];
tensor<int32, [3]> var_1404 = const()[name = string("op_1404"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [1]> input_79_axes_0 = const()[name = string("input_79_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 3072, 64]> var_1405 = transpose(perm = var_1404, x = var_1397_cast_fp16)[name = string("transpose_8")];
tensor<fp16, [1, 3072, 1, 64]> input_79 = expand_dims(axes = input_79_axes_0, x = var_1405)[name = string("input_79")];
string input_81_pad_type_0 = const()[name = string("input_81_pad_type_0"), val = string("valid")];
tensor<int32, [2]> input_81_strides_0 = const()[name = string("input_81_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> input_81_pad_0 = const()[name = string("input_81_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> input_81_dilations_0 = const()[name = string("input_81_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 input_81_groups_0 = const()[name = string("input_81_groups_0"), val = int32(1)];
tensor<fp16, [1, 8192, 1, 64]> input_81 = conv(dilations = input_81_dilations_0, groups = input_81_groups_0, pad = input_81_pad_0, pad_type = input_81_pad_type_0, strides = input_81_strides_0, weight = model_model_layers_19_mlp_gate_proj_weight_palettized, x = input_79)[name = string("input_81")];
string up_states_11_pad_type_0 = const()[name = string("up_states_11_pad_type_0"), val = string("valid")];
tensor<int32, [2]> up_states_11_strides_0 = const()[name = string("up_states_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> up_states_11_pad_0 = const()[name = string("up_states_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> up_states_11_dilations_0 = const()[name = string("up_states_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 up_states_11_groups_0 = const()[name = string("up_states_11_groups_0"), val = int32(1)];
tensor<fp16, [1, 8192, 1, 64]> up_states_11 = conv(dilations = up_states_11_dilations_0, groups = up_states_11_groups_0, pad = up_states_11_pad_0, pad_type = up_states_11_pad_type_0, strides = up_states_11_strides_0, weight = model_model_layers_19_mlp_up_proj_weight_palettized, x = input_79)[name = string("up_states_11")];
tensor<fp16, [1, 8192, 1, 64]> gate_states_11 = silu(x = input_81)[name = string("gate_states_11")];
tensor<fp16, [1, 8192, 1, 64]> input_83 = mul(x = gate_states_11, y = up_states_11)[name = string("input_83")];
string hidden_states_47_pad_type_0 = const()[name = string("hidden_states_47_pad_type_0"), val = string("valid")];
tensor<int32, [2]> hidden_states_47_strides_0 = const()[name = string("hidden_states_47_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> hidden_states_47_pad_0 = const()[name = string("hidden_states_47_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> hidden_states_47_dilations_0 = const()[name = string("hidden_states_47_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 hidden_states_47_groups_0 = const()[name = string("hidden_states_47_groups_0"), val = int32(1)];
tensor<fp16, [1, 3072, 1, 64]> hidden_states_47 = conv(dilations = hidden_states_47_dilations_0, groups = hidden_states_47_groups_0, pad = hidden_states_47_pad_0, pad_type = hidden_states_47_pad_type_0, strides = hidden_states_47_strides_0, weight = model_model_layers_19_mlp_down_proj_weight_palettized, x = input_83)[name = string("hidden_states_47")];
tensor<int32, [1]> var_1427_axes_0 = const()[name = string("op_1427_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 3072, 64]> var_1427 = squeeze(axes = var_1427_axes_0, x = hidden_states_47)[name = string("op_1427")];
tensor<int32, [3]> var_1428 = const()[name = string("op_1428"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 64, 3072]> var_1429 = transpose(perm = var_1428, x = var_1427)[name = string("transpose_7")];
tensor<fp16, [1, 64, 3072]> hidden_states_49_cast_fp16 = add(x = hidden_states_45_cast_fp16, y = var_1429)[name = string("hidden_states_49_cast_fp16")];
tensor<int32, [1]> mean_25_axes_0 = const()[name = string("mean_25_axes_0"), val = tensor<int32, [1]>([-1])];
bool mean_25_keep_dims_0 = const()[name = string("mean_25_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 64, 1]> mean_25_cast_fp16 = reduce_mean(axes = mean_25_axes_0, keep_dims = mean_25_keep_dims_0, x = hidden_states_49_cast_fp16)[name = string("mean_25_cast_fp16")];
tensor<fp16, [1, 64, 3072]> input_85_cast_fp16 = sub(x = hidden_states_49_cast_fp16, y = mean_25_cast_fp16)[name = string("input_85_cast_fp16")];
tensor<int32, [1]> var_1437_axes_0 = const()[name = string("op_1437_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [3072]> model_model_layers_20_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_20_input_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415560704)))];
tensor<fp16, [1, 64, 3072]> var_1437_cast_fp16 = layer_norm(axes = var_1437_axes_0, epsilon = var_49_to_fp16, gamma = model_model_layers_20_input_layernorm_weight_to_fp16, x = input_85_cast_fp16)[name = string("op_1437_cast_fp16")];
tensor<int32, [3]> var_1441 = const()[name = string("op_1441"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [1]> var_1443_axes_0 = const()[name = string("op_1443_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 3072, 64]> var_1442 = transpose(perm = var_1441, x = var_1437_cast_fp16)[name = string("transpose_6")];
tensor<fp16, [1, 3072, 1, 64]> var_1443 = expand_dims(axes = var_1443_axes_0, x = var_1442)[name = string("op_1443")];
string query_states_25_pad_type_0 = const()[name = string("query_states_25_pad_type_0"), val = string("valid")];
tensor<int32, [2]> query_states_25_strides_0 = const()[name = string("query_states_25_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> query_states_25_pad_0 = const()[name = string("query_states_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> query_states_25_dilations_0 = const()[name = string("query_states_25_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 query_states_25_groups_0 = const()[name = string("query_states_25_groups_0"), val = int32(1)];
tensor<fp16, [1, 3072, 1, 64]> query_states_25 = conv(dilations = query_states_25_dilations_0, groups = query_states_25_groups_0, pad = query_states_25_pad_0, pad_type = query_states_25_pad_type_0, strides = query_states_25_strides_0, weight = model_model_layers_20_self_attn_q_proj_weight_palettized, x = var_1443)[name = string("query_states_25")];
string key_states_37_pad_type_0 = const()[name = string("key_states_37_pad_type_0"), val = string("valid")];
tensor<int32, [2]> key_states_37_strides_0 = const()[name = string("key_states_37_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> key_states_37_pad_0 = const()[name = string("key_states_37_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> key_states_37_dilations_0 = const()[name = string("key_states_37_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 key_states_37_groups_0 = const()[name = string("key_states_37_groups_0"), val = int32(1)];
tensor<fp16, [1, 1024, 1, 64]> key_states_37 = conv(dilations = key_states_37_dilations_0, groups = key_states_37_groups_0, pad = key_states_37_pad_0, pad_type = key_states_37_pad_type_0, strides = key_states_37_strides_0, weight = model_model_layers_20_self_attn_k_proj_weight_palettized, x = var_1443)[name = string("key_states_37")];
string value_states_37_pad_type_0 = const()[name = string("value_states_37_pad_type_0"), val = string("valid")];
tensor<int32, [2]> value_states_37_strides_0 = const()[name = string("value_states_37_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> value_states_37_pad_0 = const()[name = string("value_states_37_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> value_states_37_dilations_0 = const()[name = string("value_states_37_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 value_states_37_groups_0 = const()[name = string("value_states_37_groups_0"), val = int32(1)];
tensor<fp16, [1, 1024, 1, 64]> value_states_37 = conv(dilations = value_states_37_dilations_0, groups = value_states_37_groups_0, pad = value_states_37_pad_0, pad_type = value_states_37_pad_type_0, strides = value_states_37_strides_0, weight = model_model_layers_20_self_attn_v_proj_weight_palettized, x = var_1443)[name = string("value_states_37")];
tensor<int32, [4]> var_1463 = const()[name = string("op_1463"), val = tensor<int32, [4]>([1, 24, 128, 64])];
tensor<fp16, [1, 24, 128, 64]> var_1464 = reshape(shape = var_1463, x = query_states_25)[name = string("op_1464")];
tensor<int32, [4]> var_1465 = const()[name = string("op_1465"), val = tensor<int32, [4]>([0, 1, 3, 2])];
tensor<int32, [4]> var_1467 = const()[name = string("op_1467"), val = tensor<int32, [4]>([1, 8, 128, 64])];
tensor<fp16, [1, 8, 128, 64]> var_1468 = reshape(shape = var_1467, x = key_states_37)[name = string("op_1468")];
tensor<int32, [4]> var_1469 = const()[name = string("op_1469"), val = tensor<int32, [4]>([0, 1, 3, 2])];
tensor<int32, [4]> var_1471 = const()[name = string("op_1471"), val = tensor<int32, [4]>([1, 8, 128, 64])];
tensor<fp16, [1, 8, 128, 64]> var_1472 = reshape(shape = var_1471, x = value_states_37)[name = string("op_1472")];
tensor<int32, [4]> var_1473 = const()[name = string("op_1473"), val = tensor<int32, [4]>([0, 1, 3, 2])];
tensor<int32, [4]> x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor<int32, [4]>([1, 24, 64, 64])];
tensor<bool, [4]> x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 24, 64, 128]> x_169 = transpose(perm = var_1465, x = var_1464)[name = string("transpose_5")];
tensor<fp16, [1, 24, 64, 64]> x1_25 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = x_169)[name = string("x1_25")];
tensor<int32, [4]> x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor<int32, [4]>([1, 24, 64, 128])];
tensor<bool, [4]> x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 24, 64, 64]> x2_25 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = x_169)[name = string("x2_25")];
tensor<fp16, [1, 24, 64, 64]> var_1491 = mul(x = x1_25, y = cos_7)[name = string("op_1491")];
tensor<fp16, [1, 24, 64, 64]> var_1492 = mul(x = x2_25, y = sin_7)[name = string("op_1492")];
tensor<fp16, [1, 24, 64, 64]> var_1493 = sub(x = var_1491, y = var_1492)[name = string("op_1493")];
tensor<fp16, [1, 24, 64, 64]> var_1494 = mul(x = x2_25, y = cos_7)[name = string("op_1494")];
tensor<fp16, [1, 24, 64, 64]> var_1495 = mul(x = x1_25, y = sin_7)[name = string("op_1495")];
tensor<fp16, [1, 24, 64, 64]> var_1496 = add(x = var_1494, y = var_1495)[name = string("op_1496")];
bool rotated_25_interleave_0 = const()[name = string("rotated_25_interleave_0"), val = bool(false)];
tensor<fp16, [1, 24, 64, 128]> rotated_25 = concat(axis = var_47, interleave = rotated_25_interleave_0, values = (var_1493, var_1496))[name = string("rotated_25")];
tensor<int32, [4]> x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [4]> x1_end_0 = const()[name = string("x1_end_0"), val = tensor<int32, [4]>([1, 8, 64, 64])];
tensor<bool, [4]> x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
tensor<fp16, [1, 8, 64, 128]> x_173 = transpose(perm = var_1469, x = var_1468)[name = string("transpose_4")];
tensor<fp16, [1, 8, 64, 64]> x1 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = x_173)[name = string("x1")];
tensor<int32, [4]> x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
tensor<int32, [4]> x2_end_0 = const()[name = string("x2_end_0"), val = tensor<int32, [4]>([1, 8, 64, 128])];
tensor<bool, [4]> x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
tensor<fp16, [1, 8, 64, 64]> x2 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = x_173)[name = string("x2")];
tensor<fp16, [1, 8, 64, 64]> var_1512 = mul(x = x1, y = cos_7)[name = string("op_1512")];
tensor<fp16, [1, 8, 64, 64]> var_1513 = mul(x = x2, y = sin_7)[name = string("op_1513")];
tensor<fp16, [1, 8, 64, 64]> var_1514 = sub(x = var_1512, y = var_1513)[name = string("op_1514")];
tensor<fp16, [1, 8, 64, 64]> var_1515 = mul(x = x2, y = cos_7)[name = string("op_1515")];
tensor<fp16, [1, 8, 64, 64]> var_1516 = mul(x = x1, y = sin_7)[name = string("op_1516")];
tensor<fp16, [1, 8, 64, 64]> var_1517 = add(x = var_1515, y = var_1516)[name = string("op_1517")];
bool rotated_interleave_0 = const()[name = string("rotated_interleave_0"), val = bool(false)];
tensor<fp16, [1, 8, 64, 128]> rotated = concat(axis = var_47, interleave = rotated_interleave_0, values = (var_1514, var_1517))[name = string("rotated")];
tensor<int32, [1]> expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor<int32, [1]>([20])];
tensor<int32, [1]> expand_dims_73 = const()[name = string("expand_dims_73"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_75 = const()[name = string("expand_dims_75"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_76 = const()[name = string("expand_dims_76"), val = tensor<int32, [1]>([21])];
int32 concat_110_axis_0 = const()[name = string("concat_110_axis_0"), val = int32(0)];
bool concat_110_interleave_0 = const()[name = string("concat_110_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_110 = concat(axis = concat_110_axis_0, interleave = concat_110_interleave_0, values = (expand_dims_72, expand_dims_73, current_pos, expand_dims_75))[name = string("concat_110")];
tensor<int32, [1]> concat_111_values1_0 = const()[name = string("concat_111_values1_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> concat_111_values3_0 = const()[name = string("concat_111_values3_0"), val = tensor<int32, [1]>([0])];
int32 concat_111_axis_0 = const()[name = string("concat_111_axis_0"), val = int32(0)];
bool concat_111_interleave_0 = const()[name = string("concat_111_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_111 = concat(axis = concat_111_axis_0, interleave = concat_111_interleave_0, values = (expand_dims_76, concat_111_values1_0, var_344, concat_111_values3_0))[name = string("concat_111")];
tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_13_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_110, begin_mask = model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0, end = concat_111, end_mask = model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_13_stride_0, update = rotated, x = coreml_update_state_25)[name = string("model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16")];
write_state(data = model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_26_write_state")];
tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_26 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_26")];
tensor<int32, [1]> expand_dims_78 = const()[name = string("expand_dims_78"), val = tensor<int32, [1]>([48])];
tensor<int32, [1]> expand_dims_79 = const()[name = string("expand_dims_79"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_81 = const()[name = string("expand_dims_81"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor<int32, [1]>([49])];
int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)];
bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (expand_dims_78, expand_dims_79, current_pos, expand_dims_81))[name = string("concat_114")];
tensor<int32, [1]> concat_115_values1_0 = const()[name = string("concat_115_values1_0"), val = tensor<int32, [1]>([0])];
tensor<int32, [1]> concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor<int32, [1]>([0])];
int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)];
bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)];
tensor<int32, [4]> concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (expand_dims_82, concat_115_values1_0, var_344, concat_115_values3_0))[name = string("concat_115")];
tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_14_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
tensor<fp16, [1, 8, 64, 128]> value_states_39 = transpose(perm = var_1473, x = var_1472)[name = string("transpose_3")];
tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_114, begin_mask = model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0, end = concat_115, end_mask = model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_14_stride_0, update = value_states_39, x = coreml_update_state_26)[name = string("model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16")];
write_state(data = model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_27_write_state")];
tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_27 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_27")];
tensor<int32, [4]> var_1540_begin_0 = const()[name = string("op_1540_begin_0"), val = tensor<int32, [4]>([20, 0, 0, 0])];
tensor<int32, [4]> var_1540_end_0 = const()[name = string("op_1540_end_0"), val = tensor<int32, [4]>([21, 8, 1024, 128])];
tensor<bool, [4]> var_1540_end_mask_0 = const()[name = string("op_1540_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<fp16, [1, 8, 1024, 128]> var_1540_cast_fp16 = slice_by_index(begin = var_1540_begin_0, end = var_1540_end_0, end_mask = var_1540_end_mask_0, x = coreml_update_state_27)[name = string("op_1540_cast_fp16")];
tensor<int32, [1]> K_layer_cache_axes_0 = const()[name = string("K_layer_cache_axes_0"), val = tensor<int32, [1]>([0])];
tensor<fp16, [8, 1024, 128]> K_layer_cache_cast_fp16 = squeeze(axes = K_layer_cache_axes_0, x = var_1540_cast_fp16)[name = string("K_layer_cache_cast_fp16")];
tensor<int32, [4]> var_1542_begin_0 = const()[name = string("op_1542_begin_0"), val = tensor<int32, [4]>([48, 0, 0, 0])];
tensor<int32, [4]> var_1542_end_0 = const()[name = string("op_1542_end_0"), val = tensor<int32, [4]>([49, 8, 1024, 128])];
tensor<bool, [4]> var_1542_end_mask_0 = const()[name = string("op_1542_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
tensor<fp16, [1, 8, 1024, 128]> var_1542_cast_fp16 = slice_by_index(begin = var_1542_begin_0, end = var_1542_end_0, end_mask = var_1542_end_mask_0, x = coreml_update_state_27)[name = string("op_1542_cast_fp16")];
tensor<int32, [1]> V_layer_cache_axes_0 = const()[name = string("V_layer_cache_axes_0"), val = tensor<int32, [1]>([0])];
tensor<fp16, [8, 1024, 128]> V_layer_cache_cast_fp16 = squeeze(axes = V_layer_cache_axes_0, x = var_1542_cast_fp16)[name = string("V_layer_cache_cast_fp16")];
tensor<int32, [1]> x_179_axes_0 = const()[name = string("x_179_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [8, 1, 1024, 128]> x_179_cast_fp16 = expand_dims(axes = x_179_axes_0, x = K_layer_cache_cast_fp16)[name = string("x_179_cast_fp16")];
tensor<int32, [4]> var_1551 = const()[name = string("op_1551"), val = tensor<int32, [4]>([1, 3, 1, 1])];
tensor<fp16, [8, 3, 1024, 128]> x_181_cast_fp16 = tile(reps = var_1551, x = x_179_cast_fp16)[name = string("x_181_cast_fp16")];
tensor<int32, [4]> var_1555 = const()[name = string("op_1555"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
tensor<fp16, [1, 24, 1024, 128]> var_1556_cast_fp16 = reshape(shape = var_1555, x = x_181_cast_fp16)[name = string("op_1556_cast_fp16")];
tensor<int32, [1]> x_185_axes_0 = const()[name = string("x_185_axes_0"), val = tensor<int32, [1]>([1])];
tensor<fp16, [8, 1, 1024, 128]> x_185_cast_fp16 = expand_dims(axes = x_185_axes_0, x = V_layer_cache_cast_fp16)[name = string("x_185_cast_fp16")];
tensor<int32, [4]> var_1558 = const()[name = string("op_1558"), val = tensor<int32, [4]>([1, 3, 1, 1])];
tensor<fp16, [8, 3, 1024, 128]> x_187_cast_fp16 = tile(reps = var_1558, x = x_185_cast_fp16)[name = string("x_187_cast_fp16")];
bool var_1565_transpose_x_0 = const()[name = string("op_1565_transpose_x_0"), val = bool(false)];
bool var_1565_transpose_y_0 = const()[name = string("op_1565_transpose_y_0"), val = bool(true)];
tensor<fp16, [1, 24, 64, 1024]> var_1565_cast_fp16 = matmul(transpose_x = var_1565_transpose_x_0, transpose_y = var_1565_transpose_y_0, x = rotated_25, y = var_1556_cast_fp16)[name = string("op_1565_cast_fp16")];
fp16 var_1566_to_fp16 = const()[name = string("op_1566_to_fp16"), val = fp16(0x1.6ap-4)];
tensor<fp16, [1, 24, 64, 1024]> attn_weights_cast_fp16 = mul(x = var_1565_cast_fp16, y = var_1566_to_fp16)[name = string("attn_weights_cast_fp16")];
tensor<fp16, [1, 24, 64, 1024]> x_189_cast_fp16 = add(x = attn_weights_cast_fp16, y = causal_mask)[name = string("x_189_cast_fp16")];
tensor<int32, [1]> reduce_max_6_axes_0 = const()[name = string("reduce_max_6_axes_0"), val = tensor<int32, [1]>([-1])];
bool reduce_max_6_keep_dims_0 = const()[name = string("reduce_max_6_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 24, 64, 1]> reduce_max_6_cast_fp16 = reduce_max(axes = reduce_max_6_axes_0, keep_dims = reduce_max_6_keep_dims_0, x = x_189_cast_fp16)[name = string("reduce_max_6_cast_fp16")];
tensor<fp16, [1, 24, 64, 1024]> x_191_cast_fp16 = sub(x = x_189_cast_fp16, y = reduce_max_6_cast_fp16)[name = string("x_191_cast_fp16")];
tensor<fp16, [1, 24, 64, 1024]> exp_x_cast_fp16 = exp(x = x_191_cast_fp16)[name = string("exp_x_cast_fp16")];
tensor<int32, [1]> var_1577_axes_0 = const()[name = string("op_1577_axes_0"), val = tensor<int32, [1]>([-1])];
bool var_1577_keep_dims_0 = const()[name = string("op_1577_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 24, 64, 1]> var_1577_cast_fp16 = reduce_sum(axes = var_1577_axes_0, keep_dims = var_1577_keep_dims_0, x = exp_x_cast_fp16)[name = string("op_1577_cast_fp16")];
tensor<fp16, [1, 24, 64, 1024]> var_1578_cast_fp16 = real_div(x = exp_x_cast_fp16, y = var_1577_cast_fp16)[name = string("op_1578_cast_fp16")];
tensor<int32, [3]> concat_120 = const()[name = string("concat_120"), val = tensor<int32, [3]>([24, 64, 1024])];
tensor<fp16, [24, 64, 1024]> reshape_18_cast_fp16 = reshape(shape = concat_120, x = var_1578_cast_fp16)[name = string("reshape_18_cast_fp16")];
tensor<int32, [3]> concat_121 = const()[name = string("concat_121"), val = tensor<int32, [3]>([24, 1024, 128])];
tensor<fp16, [24, 1024, 128]> reshape_19_cast_fp16 = reshape(shape = concat_121, x = x_187_cast_fp16)[name = string("reshape_19_cast_fp16")];
bool matmul_6_transpose_x_0 = const()[name = string("matmul_6_transpose_x_0"), val = bool(false)];
bool matmul_6_transpose_y_0 = const()[name = string("matmul_6_transpose_y_0"), val = bool(false)];
tensor<fp16, [24, 64, 128]> matmul_6_cast_fp16 = matmul(transpose_x = matmul_6_transpose_x_0, transpose_y = matmul_6_transpose_y_0, x = reshape_18_cast_fp16, y = reshape_19_cast_fp16)[name = string("matmul_6_cast_fp16")];
tensor<int32, [4]> concat_125 = const()[name = string("concat_125"), val = tensor<int32, [4]>([1, 24, 64, 128])];
tensor<fp16, [1, 24, 64, 128]> reshape_20_cast_fp16 = reshape(shape = concat_125, x = matmul_6_cast_fp16)[name = string("reshape_20_cast_fp16")];
tensor<int32, [4]> var_1581_perm_0 = const()[name = string("op_1581_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
tensor<int32, [3]> var_1583 = const()[name = string("op_1583"), val = tensor<int32, [3]>([1, 64, 3072])];
tensor<fp16, [1, 64, 24, 128]> var_1581_cast_fp16 = transpose(perm = var_1581_perm_0, x = reshape_20_cast_fp16)[name = string("transpose_2")];
tensor<fp16, [1, 64, 3072]> input_89_cast_fp16 = reshape(shape = var_1583, x = var_1581_cast_fp16)[name = string("input_89_cast_fp16")];
tensor<fp16, [3072, 3072]> model_model_layers_20_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415566912))), lut = tensor<fp16, [384, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(420285568))))[name = string("model_model_layers_20_self_attn_o_proj_weight_promoted_to_fp16_palettized")];
tensor<fp16, [1, 64, 3072]> linear_6_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_20_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_89_cast_fp16)[name = string("linear_6_cast_fp16")];
tensor<fp16, [1, 64, 3072]> hidden_states_53_cast_fp16 = add(x = hidden_states_49_cast_fp16, y = linear_6_cast_fp16)[name = string("hidden_states_53_cast_fp16")];
tensor<int32, [1]> mean_axes_0 = const()[name = string("mean_axes_0"), val = tensor<int32, [1]>([-1])];
bool mean_keep_dims_0 = const()[name = string("mean_keep_dims_0"), val = bool(true)];
tensor<fp16, [1, 64, 1]> mean_cast_fp16 = reduce_mean(axes = mean_axes_0, keep_dims = mean_keep_dims_0, x = hidden_states_53_cast_fp16)[name = string("mean_cast_fp16")];
tensor<fp16, [1, 64, 3072]> input_91_cast_fp16 = sub(x = hidden_states_53_cast_fp16, y = mean_cast_fp16)[name = string("input_91_cast_fp16")];
tensor<int32, [1]> var_1594_axes_0 = const()[name = string("op_1594_axes_0"), val = tensor<int32, [1]>([-1])];
tensor<fp16, [3072]> model_model_layers_20_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_20_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(420297920)))];
tensor<fp16, [1, 64, 3072]> var_1594_cast_fp16 = layer_norm(axes = var_1594_axes_0, epsilon = var_49_to_fp16, gamma = model_model_layers_20_post_attention_layernorm_weight_to_fp16, x = input_91_cast_fp16)[name = string("op_1594_cast_fp16")];
tensor<int32, [3]> var_1601 = const()[name = string("op_1601"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<int32, [1]> input_93_axes_0 = const()[name = string("input_93_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 3072, 64]> var_1602 = transpose(perm = var_1601, x = var_1594_cast_fp16)[name = string("transpose_1")];
tensor<fp16, [1, 3072, 1, 64]> input_93 = expand_dims(axes = input_93_axes_0, x = var_1602)[name = string("input_93")];
string input_95_pad_type_0 = const()[name = string("input_95_pad_type_0"), val = string("valid")];
tensor<int32, [2]> input_95_strides_0 = const()[name = string("input_95_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> input_95_pad_0 = const()[name = string("input_95_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> input_95_dilations_0 = const()[name = string("input_95_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 input_95_groups_0 = const()[name = string("input_95_groups_0"), val = int32(1)];
tensor<fp16, [1, 8192, 1, 64]> input_95 = conv(dilations = input_95_dilations_0, groups = input_95_groups_0, pad = input_95_pad_0, pad_type = input_95_pad_type_0, strides = input_95_strides_0, weight = model_model_layers_20_mlp_gate_proj_weight_palettized, x = input_93)[name = string("input_95")];
string up_states_pad_type_0 = const()[name = string("up_states_pad_type_0"), val = string("valid")];
tensor<int32, [2]> up_states_strides_0 = const()[name = string("up_states_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> up_states_pad_0 = const()[name = string("up_states_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> up_states_dilations_0 = const()[name = string("up_states_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 up_states_groups_0 = const()[name = string("up_states_groups_0"), val = int32(1)];
tensor<fp16, [1, 8192, 1, 64]> up_states = conv(dilations = up_states_dilations_0, groups = up_states_groups_0, pad = up_states_pad_0, pad_type = up_states_pad_type_0, strides = up_states_strides_0, weight = model_model_layers_20_mlp_up_proj_weight_palettized, x = input_93)[name = string("up_states")];
tensor<fp16, [1, 8192, 1, 64]> gate_states = silu(x = input_95)[name = string("gate_states")];
tensor<fp16, [1, 8192, 1, 64]> input = mul(x = gate_states, y = up_states)[name = string("input")];
string hidden_states_pad_type_0 = const()[name = string("hidden_states_pad_type_0"), val = string("valid")];
tensor<int32, [2]> hidden_states_strides_0 = const()[name = string("hidden_states_strides_0"), val = tensor<int32, [2]>([1, 1])];
tensor<int32, [4]> hidden_states_pad_0 = const()[name = string("hidden_states_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
tensor<int32, [2]> hidden_states_dilations_0 = const()[name = string("hidden_states_dilations_0"), val = tensor<int32, [2]>([1, 1])];
int32 hidden_states_groups_0 = const()[name = string("hidden_states_groups_0"), val = int32(1)];
tensor<fp16, [1, 3072, 1, 64]> hidden_states_1 = conv(dilations = hidden_states_dilations_0, groups = hidden_states_groups_0, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = hidden_states_strides_0, weight = model_model_layers_20_mlp_down_proj_weight_palettized, x = input)[name = string("hidden_states")];
tensor<int32, [1]> var_1624_axes_0 = const()[name = string("op_1624_axes_0"), val = tensor<int32, [1]>([2])];
tensor<fp16, [1, 3072, 64]> var_1624 = squeeze(axes = var_1624_axes_0, x = hidden_states_1)[name = string("op_1624")];
tensor<int32, [3]> var_1625 = const()[name = string("op_1625"), val = tensor<int32, [3]>([0, 2, 1])];
tensor<fp16, [1, 64, 3072]> var_1626 = transpose(perm = var_1625, x = var_1624)[name = string("transpose_0")];
tensor<fp16, [1, 64, 3072]> output_hidden_states = add(x = hidden_states_53_cast_fp16, y = var_1626)[name = string("op_1627_cast_fp16")];
} -> (output_hidden_states);
}