File size: 473,102 Bytes

d1b7e2d

program(1.3)
[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3404.16.1"}, {"coremlc-version", "3404.23.1"}})]
{
    func infer<ios18>(tensor<fp16, [1, 1, 1, 1024]> causal_mask, tensor<int32, [1]> current_pos, tensor<fp16, [1, 1, 3072]> hidden_states, state<tensor<fp16, [56, 8, 1024, 128]>> model_model_kv_cache_0, tensor<int32, [1]> position_ids) {
            tensor<fp16, [3072, 3072, 1, 1]> model_model_layers_7_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor<fp16, [384, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9437312))))[name = string("model_model_layers_7_self_attn_q_proj_weight_palettized")];
            tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_7_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9633984))), lut = tensor<fp16, [128, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12779776))))[name = string("model_model_layers_7_self_attn_k_proj_weight_palettized")];
            tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_7_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12845376))), lut = tensor<fp16, [128, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15991168))))[name = string("model_model_layers_7_self_attn_v_proj_weight_palettized")];
            tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_7_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16056768))), lut = tensor<fp16, [1024, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41222656))))[name = string("model_model_layers_7_mlp_gate_proj_weight_palettized")];
            tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_7_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41747008))), lut = tensor<fp16, [1024, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66912896))))[name = string("model_model_layers_7_mlp_up_proj_weight_palettized")];
            tensor<fp16, [3072, 8192, 1, 1]> model_model_layers_7_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 8192, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67437248))), lut = tensor<fp16, [384, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92603136))))[name = string("model_model_layers_7_mlp_down_proj_weight_palettized")];
            tensor<fp16, [3072, 3072, 1, 1]> model_model_layers_8_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92799808))), lut = tensor<fp16, [384, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102237056))))[name = string("model_model_layers_8_self_attn_q_proj_weight_palettized")];
            tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_8_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102433728))), lut = tensor<fp16, [128, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105579520))))[name = string("model_model_layers_8_self_attn_k_proj_weight_palettized")];
            tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_8_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105645120))), lut = tensor<fp16, [128, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108790912))))[name = string("model_model_layers_8_self_attn_v_proj_weight_palettized")];
            tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_8_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108856512))), lut = tensor<fp16, [1024, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134022400))))[name = string("model_model_layers_8_mlp_gate_proj_weight_palettized")];
            tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_8_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134546752))), lut = tensor<fp16, [1024, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(159712640))))[name = string("model_model_layers_8_mlp_up_proj_weight_palettized")];
            tensor<fp16, [3072, 8192, 1, 1]> model_model_layers_8_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 8192, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160236992))), lut = tensor<fp16, [384, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185402880))))[name = string("model_model_layers_8_mlp_down_proj_weight_palettized")];
            tensor<fp16, [3072, 3072, 1, 1]> model_model_layers_9_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185599552))), lut = tensor<fp16, [384, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(195036800))))[name = string("model_model_layers_9_self_attn_q_proj_weight_palettized")];
            tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_9_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(195233472))), lut = tensor<fp16, [128, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198379264))))[name = string("model_model_layers_9_self_attn_k_proj_weight_palettized")];
            tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_9_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198444864))), lut = tensor<fp16, [128, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201590656))))[name = string("model_model_layers_9_self_attn_v_proj_weight_palettized")];
            tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_9_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201656256))), lut = tensor<fp16, [1024, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226822144))))[name = string("model_model_layers_9_mlp_gate_proj_weight_palettized")];
            tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_9_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227346496))), lut = tensor<fp16, [1024, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(252512384))))[name = string("model_model_layers_9_mlp_up_proj_weight_palettized")];
            tensor<fp16, [3072, 8192, 1, 1]> model_model_layers_9_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 8192, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253036736))), lut = tensor<fp16, [384, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(278202624))))[name = string("model_model_layers_9_mlp_down_proj_weight_palettized")];
            tensor<fp16, [3072, 3072, 1, 1]> model_model_layers_10_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(278399296))), lut = tensor<fp16, [384, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287836544))))[name = string("model_model_layers_10_self_attn_q_proj_weight_palettized")];
            tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_10_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(288033216))), lut = tensor<fp16, [128, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(291179008))))[name = string("model_model_layers_10_self_attn_k_proj_weight_palettized")];
            tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_10_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(291244608))), lut = tensor<fp16, [128, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294390400))))[name = string("model_model_layers_10_self_attn_v_proj_weight_palettized")];
            tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_10_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294456000))), lut = tensor<fp16, [1024, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319621888))))[name = string("model_model_layers_10_mlp_gate_proj_weight_palettized")];
            tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_10_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320146240))), lut = tensor<fp16, [1024, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345312128))))[name = string("model_model_layers_10_mlp_up_proj_weight_palettized")];
            tensor<fp16, [3072, 8192, 1, 1]> model_model_layers_10_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 8192, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345836480))), lut = tensor<fp16, [384, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371002368))))[name = string("model_model_layers_10_mlp_down_proj_weight_palettized")];
            tensor<fp16, [3072, 3072, 1, 1]> model_model_layers_11_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371199040))), lut = tensor<fp16, [384, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380636288))))[name = string("model_model_layers_11_self_attn_q_proj_weight_palettized")];
            tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_11_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380832960))), lut = tensor<fp16, [128, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383978752))))[name = string("model_model_layers_11_self_attn_k_proj_weight_palettized")];
            tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_11_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384044352))), lut = tensor<fp16, [128, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387190144))))[name = string("model_model_layers_11_self_attn_v_proj_weight_palettized")];
            tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_11_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387255744))), lut = tensor<fp16, [1024, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412421632))))[name = string("model_model_layers_11_mlp_gate_proj_weight_palettized")];
            tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_11_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412945984))), lut = tensor<fp16, [1024, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438111872))))[name = string("model_model_layers_11_mlp_up_proj_weight_palettized")];
            tensor<fp16, [3072, 8192, 1, 1]> model_model_layers_11_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 8192, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438636224))), lut = tensor<fp16, [384, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(463802112))))[name = string("model_model_layers_11_mlp_down_proj_weight_palettized")];
            tensor<fp16, [3072, 3072, 1, 1]> model_model_layers_12_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(463998784))), lut = tensor<fp16, [384, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473436032))))[name = string("model_model_layers_12_self_attn_q_proj_weight_palettized")];
            tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_12_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473632704))), lut = tensor<fp16, [128, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(476778496))))[name = string("model_model_layers_12_self_attn_k_proj_weight_palettized")];
            tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_12_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(476844096))), lut = tensor<fp16, [128, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(479989888))))[name = string("model_model_layers_12_self_attn_v_proj_weight_palettized")];
            tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_12_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(480055488))), lut = tensor<fp16, [1024, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505221376))))[name = string("model_model_layers_12_mlp_gate_proj_weight_palettized")];
            tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_12_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505745728))), lut = tensor<fp16, [1024, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530911616))))[name = string("model_model_layers_12_mlp_up_proj_weight_palettized")];
            tensor<fp16, [3072, 8192, 1, 1]> model_model_layers_12_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 8192, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531435968))), lut = tensor<fp16, [384, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(556601856))))[name = string("model_model_layers_12_mlp_down_proj_weight_palettized")];
            tensor<fp16, [3072, 3072, 1, 1]> model_model_layers_13_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(556798528))), lut = tensor<fp16, [384, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(566235776))))[name = string("model_model_layers_13_self_attn_q_proj_weight_palettized")];
            tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_13_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(566432448))), lut = tensor<fp16, [128, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(569578240))))[name = string("model_model_layers_13_self_attn_k_proj_weight_palettized")];
            tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_13_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(569643840))), lut = tensor<fp16, [128, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(572789632))))[name = string("model_model_layers_13_self_attn_v_proj_weight_palettized")];
            tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_13_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(572855232))), lut = tensor<fp16, [1024, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(598021120))))[name = string("model_model_layers_13_mlp_gate_proj_weight_palettized")];
            tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_13_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(598545472))), lut = tensor<fp16, [1024, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(623711360))))[name = string("model_model_layers_13_mlp_up_proj_weight_palettized")];
            tensor<fp16, [3072, 8192, 1, 1]> model_model_layers_13_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 8192, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(624235712))), lut = tensor<fp16, [384, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(649401600))))[name = string("model_model_layers_13_mlp_down_proj_weight_palettized")];
            int32 var_51 = const()[name = string("op_51"), val = int32(-1)];
            int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)];
            tensor<bool, [1]> greater_equal_0 = greater_equal(x = current_pos, y = greater_equal_0_y_0)[name = string("greater_equal_0")];
            int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(131072)];
            tensor<int32, [1]> add_0 = add(x = current_pos, y = slice_by_index_0)[name = string("add_0")];
            tensor<int32, [1]> select_0 = select(a = current_pos, b = add_0, cond = greater_equal_0)[name = string("select_0")];
            int32 var_235_axis_0 = const()[name = string("op_235_axis_0"), val = int32(1)];
            int32 var_235_batch_dims_0 = const()[name = string("op_235_batch_dims_0"), val = int32(0)];
            bool var_235_validate_indices_0 = const()[name = string("op_235_validate_indices_0"), val = bool(false)];
            tensor<fp16, [1, 131072, 128]> var_56_to_fp16 = const()[name = string("op_56_to_fp16"), val = tensor<fp16, [1, 131072, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(649598272)))];
            tensor<fp16, [1, 1, 128]> var_235_cast_fp16 = gather(axis = var_235_axis_0, batch_dims = var_235_batch_dims_0, indices = select_0, validate_indices = var_235_validate_indices_0, x = var_56_to_fp16)[name = string("op_235_cast_fp16")];
            tensor<int32, [4]> var_236 = const()[name = string("op_236"), val = tensor<int32, [4]>([1, 1, 1, -1])];
            tensor<fp16, [1, 1, 1, 128]> sin_1_cast_fp16 = reshape(shape = var_236, x = var_235_cast_fp16)[name = string("sin_1_cast_fp16")];
            int32 var_240_axis_0 = const()[name = string("op_240_axis_0"), val = int32(1)];
            int32 var_240_batch_dims_0 = const()[name = string("op_240_batch_dims_0"), val = int32(0)];
            bool var_240_validate_indices_0 = const()[name = string("op_240_validate_indices_0"), val = bool(false)];
            tensor<fp16, [1, 131072, 128]> var_50_to_fp16 = const()[name = string("op_50_to_fp16"), val = tensor<fp16, [1, 131072, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(683152768)))];
            tensor<fp16, [1, 1, 128]> var_240_cast_fp16 = gather(axis = var_240_axis_0, batch_dims = var_240_batch_dims_0, indices = select_0, validate_indices = var_240_validate_indices_0, x = var_50_to_fp16)[name = string("op_240_cast_fp16")];
            tensor<int32, [4]> var_241 = const()[name = string("op_241"), val = tensor<int32, [4]>([1, 1, 1, -1])];
            tensor<fp16, [1, 1, 1, 128]> cos_1_cast_fp16 = reshape(shape = var_241, x = var_240_cast_fp16)[name = string("cos_1_cast_fp16")];
            tensor<int32, [1]> mean_1_axes_0 = const()[name = string("mean_1_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_1_keep_dims_0 = const()[name = string("mean_1_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 1, 1]> mean_1_cast_fp16 = reduce_mean(axes = mean_1_axes_0, keep_dims = mean_1_keep_dims_0, x = hidden_states)[name = string("mean_1_cast_fp16")];
            tensor<fp16, [1, 1, 3072]> input_1_cast_fp16 = sub(x = hidden_states, y = mean_1_cast_fp16)[name = string("input_1_cast_fp16")];
            tensor<int32, [1]> var_249_axes_0 = const()[name = string("op_249_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [3072]> model_model_layers_7_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_input_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(716707264)))];
            fp16 var_46_to_fp16 = const()[name = string("op_46_to_fp16"), val = fp16(0x1.5p-17)];
            tensor<fp16, [1, 1, 3072]> var_249_cast_fp16 = layer_norm(axes = var_249_axes_0, epsilon = var_46_to_fp16, gamma = model_model_layers_7_input_layernorm_weight_to_fp16, x = input_1_cast_fp16)[name = string("op_249_cast_fp16")];
            tensor<int32, [3]> var_252 = const()[name = string("op_252"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> var_254_axes_0 = const()[name = string("op_254_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 3072, 1]> var_253 = transpose(perm = var_252, x = var_249_cast_fp16)[name = string("transpose_27")];
            tensor<fp16, [1, 3072, 1, 1]> var_254 = expand_dims(axes = var_254_axes_0, x = var_253)[name = string("op_254")];
            string var_261_pad_type_0 = const()[name = string("op_261_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> var_261_strides_0 = const()[name = string("op_261_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> var_261_pad_0 = const()[name = string("op_261_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> var_261_dilations_0 = const()[name = string("op_261_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 var_261_groups_0 = const()[name = string("op_261_groups_0"), val = int32(1)];
            tensor<fp16, [1, 3072, 1, 1]> var_261 = conv(dilations = var_261_dilations_0, groups = var_261_groups_0, pad = var_261_pad_0, pad_type = var_261_pad_type_0, strides = var_261_strides_0, weight = model_model_layers_7_self_attn_q_proj_weight_palettized, x = var_254)[name = string("op_261")];
            tensor<int32, [4]> var_262 = const()[name = string("op_262"), val = tensor<int32, [4]>([1, 24, 1, 128])];
            tensor<fp16, [1, 24, 1, 128]> var_263 = reshape(shape = var_262, x = var_261)[name = string("op_263")];
            string var_270_pad_type_0 = const()[name = string("op_270_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> var_270_strides_0 = const()[name = string("op_270_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> var_270_pad_0 = const()[name = string("op_270_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> var_270_dilations_0 = const()[name = string("op_270_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 var_270_groups_0 = const()[name = string("op_270_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 1]> var_270 = conv(dilations = var_270_dilations_0, groups = var_270_groups_0, pad = var_270_pad_0, pad_type = var_270_pad_type_0, strides = var_270_strides_0, weight = model_model_layers_7_self_attn_k_proj_weight_palettized, x = var_254)[name = string("op_270")];
            tensor<int32, [4]> var_271 = const()[name = string("op_271"), val = tensor<int32, [4]>([1, 8, 1, 128])];
            tensor<fp16, [1, 8, 1, 128]> var_272 = reshape(shape = var_271, x = var_270)[name = string("op_272")];
            string var_279_pad_type_0 = const()[name = string("op_279_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> var_279_strides_0 = const()[name = string("op_279_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> var_279_pad_0 = const()[name = string("op_279_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> var_279_dilations_0 = const()[name = string("op_279_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 var_279_groups_0 = const()[name = string("op_279_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 1]> var_279 = conv(dilations = var_279_dilations_0, groups = var_279_groups_0, pad = var_279_pad_0, pad_type = var_279_pad_type_0, strides = var_279_strides_0, weight = model_model_layers_7_self_attn_v_proj_weight_palettized, x = var_254)[name = string("op_279")];
            tensor<int32, [4]> var_280 = const()[name = string("op_280"), val = tensor<int32, [4]>([1, 8, 1, 128])];
            tensor<fp16, [1, 8, 1, 128]> var_281 = reshape(shape = var_280, x = var_279)[name = string("op_281")];
            tensor<int32, [4]> x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor<int32, [4]>([1, 24, 1, 64])];
            tensor<bool, [4]> x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 24, 1, 64]> x1_1 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = var_263)[name = string("x1_1")];
            tensor<int32, [4]> x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor<int32, [4]>([1, 24, 1, 128])];
            tensor<bool, [4]> x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 24, 1, 64]> x2_1 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = var_263)[name = string("x2_1")];
            tensor<int32, [4]> cos_3_begin_0 = const()[name = string("cos_3_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> cos_3_end_0 = const()[name = string("cos_3_end_0"), val = tensor<int32, [4]>([1, 1, 1, 64])];
            tensor<bool, [4]> cos_3_end_mask_0 = const()[name = string("cos_3_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 1, 1, 64]> cos_3_cast_fp16 = slice_by_index(begin = cos_3_begin_0, end = cos_3_end_0, end_mask = cos_3_end_mask_0, x = cos_1_cast_fp16)[name = string("cos_3_cast_fp16")];
            tensor<int32, [4]> sin_3_begin_0 = const()[name = string("sin_3_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> sin_3_end_0 = const()[name = string("sin_3_end_0"), val = tensor<int32, [4]>([1, 1, 1, 64])];
            tensor<bool, [4]> sin_3_end_mask_0 = const()[name = string("sin_3_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 1, 1, 64]> sin_3_cast_fp16 = slice_by_index(begin = sin_3_begin_0, end = sin_3_end_0, end_mask = sin_3_end_mask_0, x = sin_1_cast_fp16)[name = string("sin_3_cast_fp16")];
            tensor<fp16, [1, 24, 1, 64]> var_295_cast_fp16 = mul(x = x1_1, y = cos_3_cast_fp16)[name = string("op_295_cast_fp16")];
            tensor<fp16, [1, 24, 1, 64]> var_296_cast_fp16 = mul(x = x2_1, y = sin_3_cast_fp16)[name = string("op_296_cast_fp16")];
            tensor<fp16, [1, 24, 1, 64]> var_297_cast_fp16 = sub(x = var_295_cast_fp16, y = var_296_cast_fp16)[name = string("op_297_cast_fp16")];
            tensor<fp16, [1, 24, 1, 64]> var_298_cast_fp16 = mul(x = x2_1, y = cos_3_cast_fp16)[name = string("op_298_cast_fp16")];
            tensor<fp16, [1, 24, 1, 64]> var_299_cast_fp16 = mul(x = x1_1, y = sin_3_cast_fp16)[name = string("op_299_cast_fp16")];
            tensor<fp16, [1, 24, 1, 64]> var_300_cast_fp16 = add(x = var_298_cast_fp16, y = var_299_cast_fp16)[name = string("op_300_cast_fp16")];
            bool rotated_1_interleave_0 = const()[name = string("rotated_1_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 24, 1, 128]> rotated_1_cast_fp16 = concat(axis = var_51, interleave = rotated_1_interleave_0, values = (var_297_cast_fp16, var_300_cast_fp16))[name = string("rotated_1_cast_fp16")];
            tensor<int32, [4]> x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor<int32, [4]>([1, 8, 1, 64])];
            tensor<bool, [4]> x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 8, 1, 64]> x1_3 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = var_272)[name = string("x1_3")];
            tensor<int32, [4]> x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor<int32, [4]>([1, 8, 1, 128])];
            tensor<bool, [4]> x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 8, 1, 64]> x2_3 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = var_272)[name = string("x2_3")];
            tensor<fp16, [1, 8, 1, 64]> var_316_cast_fp16 = mul(x = x1_3, y = cos_3_cast_fp16)[name = string("op_316_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_317_cast_fp16 = mul(x = x2_3, y = sin_3_cast_fp16)[name = string("op_317_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_318_cast_fp16 = sub(x = var_316_cast_fp16, y = var_317_cast_fp16)[name = string("op_318_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_319_cast_fp16 = mul(x = x2_3, y = cos_3_cast_fp16)[name = string("op_319_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_320_cast_fp16 = mul(x = x1_3, y = sin_3_cast_fp16)[name = string("op_320_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_321_cast_fp16 = add(x = var_319_cast_fp16, y = var_320_cast_fp16)[name = string("op_321_cast_fp16")];
            bool rotated_3_interleave_0 = const()[name = string("rotated_3_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 8, 1, 128]> rotated_3_cast_fp16 = concat(axis = var_51, interleave = rotated_3_interleave_0, values = (var_318_cast_fp16, var_321_cast_fp16))[name = string("rotated_3_cast_fp16")];
            int32 var_325 = const()[name = string("op_325"), val = int32(1)];
            tensor<int32, [1]> var_326 = add(x = current_pos, y = var_325)[name = string("op_326")];
            tensor<fp16, [56, 8, 1024, 128]> read_state_0 = read_state(input = model_model_kv_cache_0)[name = string("read_state_0")];
            tensor<int32, [1]> expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor<int32, [1]>([7])];
            tensor<int32, [1]> expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor<int32, [1]>([8])];
            int32 concat_2_axis_0 = const()[name = string("concat_2_axis_0"), val = int32(0)];
            bool concat_2_interleave_0 = const()[name = string("concat_2_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_2 = concat(axis = concat_2_axis_0, interleave = concat_2_interleave_0, values = (expand_dims_0, expand_dims_1, current_pos, expand_dims_3))[name = string("concat_2")];
            tensor<int32, [1]> concat_3_values1_0 = const()[name = string("concat_3_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)];
            bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_4, concat_3_values1_0, var_326, concat_3_values3_0))[name = string("concat_3")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_1_stride_0, update = rotated_3_cast_fp16, x = read_state_0)[name = string("model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_0_write_state")];
            tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_14 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_0")];
            tensor<int32, [1]> expand_dims_6 = const()[name = string("expand_dims_6"), val = tensor<int32, [1]>([35])];
            tensor<int32, [1]> expand_dims_7 = const()[name = string("expand_dims_7"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_9 = const()[name = string("expand_dims_9"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_10 = const()[name = string("expand_dims_10"), val = tensor<int32, [1]>([36])];
            int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)];
            bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (expand_dims_6, expand_dims_7, current_pos, expand_dims_9))[name = string("concat_6")];
            tensor<int32, [1]> concat_7_values1_0 = const()[name = string("concat_7_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_7_values3_0 = const()[name = string("concat_7_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_7_axis_0 = const()[name = string("concat_7_axis_0"), val = int32(0)];
            bool concat_7_interleave_0 = const()[name = string("concat_7_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_7 = concat(axis = concat_7_axis_0, interleave = concat_7_interleave_0, values = (expand_dims_10, concat_7_values1_0, var_326, concat_7_values3_0))[name = string("concat_7")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_6, begin_mask = model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0, end = concat_7, end_mask = model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_2_stride_0, update = var_281, x = coreml_update_state_14)[name = string("model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_1_write_state")];
            tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_15 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_1")];
            tensor<int32, [4]> var_341_begin_0 = const()[name = string("op_341_begin_0"), val = tensor<int32, [4]>([7, 0, 0, 0])];
            tensor<int32, [4]> var_341_end_0 = const()[name = string("op_341_end_0"), val = tensor<int32, [4]>([8, 8, 1024, 128])];
            tensor<bool, [4]> var_341_end_mask_0 = const()[name = string("op_341_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_341_cast_fp16 = slice_by_index(begin = var_341_begin_0, end = var_341_end_0, end_mask = var_341_end_mask_0, x = coreml_update_state_15)[name = string("op_341_cast_fp16")];
            tensor<int32, [1]> K_layer_cache_1_axes_0 = const()[name = string("K_layer_cache_1_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> K_layer_cache_1_cast_fp16 = squeeze(axes = K_layer_cache_1_axes_0, x = var_341_cast_fp16)[name = string("K_layer_cache_1_cast_fp16")];
            tensor<int32, [4]> var_343_begin_0 = const()[name = string("op_343_begin_0"), val = tensor<int32, [4]>([35, 0, 0, 0])];
            tensor<int32, [4]> var_343_end_0 = const()[name = string("op_343_end_0"), val = tensor<int32, [4]>([36, 8, 1024, 128])];
            tensor<bool, [4]> var_343_end_mask_0 = const()[name = string("op_343_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_343_cast_fp16 = slice_by_index(begin = var_343_begin_0, end = var_343_end_0, end_mask = var_343_end_mask_0, x = coreml_update_state_15)[name = string("op_343_cast_fp16")];
            tensor<int32, [1]> V_layer_cache_1_axes_0 = const()[name = string("V_layer_cache_1_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> V_layer_cache_1_cast_fp16 = squeeze(axes = V_layer_cache_1_axes_0, x = var_343_cast_fp16)[name = string("V_layer_cache_1_cast_fp16")];
            tensor<int32, [1]> x_11_axes_0 = const()[name = string("x_11_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_11_cast_fp16 = expand_dims(axes = x_11_axes_0, x = K_layer_cache_1_cast_fp16)[name = string("x_11_cast_fp16")];
            tensor<int32, [4]> var_352 = const()[name = string("op_352"), val = tensor<int32, [4]>([1, 3, 1, 1])];
            tensor<fp16, [8, 3, 1024, 128]> x_13_cast_fp16 = tile(reps = var_352, x = x_11_cast_fp16)[name = string("x_13_cast_fp16")];
            tensor<int32, [4]> var_356 = const()[name = string("op_356"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
            tensor<fp16, [1, 24, 1024, 128]> key_states_3_cast_fp16 = reshape(shape = var_356, x = x_13_cast_fp16)[name = string("key_states_3_cast_fp16")];
            tensor<int32, [1]> x_17_axes_0 = const()[name = string("x_17_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_17_cast_fp16 = expand_dims(axes = x_17_axes_0, x = V_layer_cache_1_cast_fp16)[name = string("x_17_cast_fp16")];
            tensor<int32, [4]> var_359 = const()[name = string("op_359"), val = tensor<int32, [4]>([1, 3, 1, 1])];
            tensor<fp16, [8, 3, 1024, 128]> x_19_cast_fp16 = tile(reps = var_359, x = x_17_cast_fp16)[name = string("x_19_cast_fp16")];
            tensor<int32, [4]> var_363 = const()[name = string("op_363"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
            tensor<fp16, [1, 24, 1024, 128]> value_states_3_cast_fp16 = reshape(shape = var_363, x = x_19_cast_fp16)[name = string("value_states_3_cast_fp16")];
            bool var_366_transpose_x_1 = const()[name = string("op_366_transpose_x_1"), val = bool(false)];
            bool var_366_transpose_y_1 = const()[name = string("op_366_transpose_y_1"), val = bool(true)];
            tensor<fp16, [1, 24, 1, 1024]> var_366_cast_fp16 = matmul(transpose_x = var_366_transpose_x_1, transpose_y = var_366_transpose_y_1, x = rotated_1_cast_fp16, y = key_states_3_cast_fp16)[name = string("op_366_cast_fp16")];
            fp16 var_367_to_fp16 = const()[name = string("op_367_to_fp16"), val = fp16(0x1.6ap-4)];
            tensor<fp16, [1, 24, 1, 1024]> attn_weights_1_cast_fp16 = mul(x = var_366_cast_fp16, y = var_367_to_fp16)[name = string("attn_weights_1_cast_fp16")];
            tensor<fp16, [1, 24, 1, 1024]> x_21_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask)[name = string("x_21_cast_fp16")];
            tensor<int32, [1]> reduce_max_0_axes_0 = const()[name = string("reduce_max_0_axes_0"), val = tensor<int32, [1]>([-1])];
            bool reduce_max_0_keep_dims_0 = const()[name = string("reduce_max_0_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 24, 1, 1]> reduce_max_0_cast_fp16 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = x_21_cast_fp16)[name = string("reduce_max_0_cast_fp16")];
            tensor<fp16, [1, 24, 1, 1024]> x_23_cast_fp16 = sub(x = x_21_cast_fp16, y = reduce_max_0_cast_fp16)[name = string("x_23_cast_fp16")];
            tensor<fp16, [1, 24, 1, 1024]> exp_x_1_cast_fp16 = exp(x = x_23_cast_fp16)[name = string("exp_x_1_cast_fp16")];
            tensor<int32, [1]> var_378_axes_0 = const()[name = string("op_378_axes_0"), val = tensor<int32, [1]>([-1])];
            bool var_378_keep_dims_0 = const()[name = string("op_378_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 24, 1, 1]> var_378_cast_fp16 = reduce_sum(axes = var_378_axes_0, keep_dims = var_378_keep_dims_0, x = exp_x_1_cast_fp16)[name = string("op_378_cast_fp16")];
            tensor<fp16, [1, 24, 1, 1024]> attn_weights_3_cast_fp16 = real_div(x = exp_x_1_cast_fp16, y = var_378_cast_fp16)[name = string("attn_weights_3_cast_fp16")];
            bool attn_output_1_transpose_x_0 = const()[name = string("attn_output_1_transpose_x_0"), val = bool(false)];
            bool attn_output_1_transpose_y_0 = const()[name = string("attn_output_1_transpose_y_0"), val = bool(false)];
            tensor<fp16, [1, 24, 1, 128]> attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_0, transpose_y = attn_output_1_transpose_y_0, x = attn_weights_3_cast_fp16, y = value_states_3_cast_fp16)[name = string("attn_output_1_cast_fp16")];
            tensor<int32, [4]> var_381_perm_0 = const()[name = string("op_381_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_383 = const()[name = string("op_383"), val = tensor<int32, [3]>([1, 1, 3072])];
            tensor<fp16, [1, 1, 24, 128]> var_381_cast_fp16 = transpose(perm = var_381_perm_0, x = attn_output_1_cast_fp16)[name = string("transpose_26")];
            tensor<fp16, [1, 1, 3072]> input_5_cast_fp16 = reshape(shape = var_383, x = var_381_cast_fp16)[name = string("input_5_cast_fp16")];
            tensor<fp16, [3072, 3072]> model_model_layers_7_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(716713472))), lut = tensor<fp16, [384, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(726150720))))[name = string("model_model_layers_7_self_attn_o_proj_weight_promoted_to_fp16_palettized")];
            tensor<fp16, [3072]> linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(726347392)))];
            tensor<fp16, [1, 1, 3072]> linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_7_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_5_cast_fp16)[name = string("linear_0_cast_fp16")];
            tensor<fp16, [1, 1, 3072]> hidden_states_5_cast_fp16 = add(x = hidden_states, y = linear_0_cast_fp16)[name = string("hidden_states_5_cast_fp16")];
            tensor<int32, [1]> mean_3_axes_0 = const()[name = string("mean_3_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_3_keep_dims_0 = const()[name = string("mean_3_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 1, 1]> mean_3_cast_fp16 = reduce_mean(axes = mean_3_axes_0, keep_dims = mean_3_keep_dims_0, x = hidden_states_5_cast_fp16)[name = string("mean_3_cast_fp16")];
            tensor<fp16, [1, 1, 3072]> input_7_cast_fp16 = sub(x = hidden_states_5_cast_fp16, y = mean_3_cast_fp16)[name = string("input_7_cast_fp16")];
            tensor<int32, [1]> var_394_axes_0 = const()[name = string("op_394_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [3072]> model_model_layers_7_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(726353600)))];
            tensor<fp16, [1, 1, 3072]> var_394_cast_fp16 = layer_norm(axes = var_394_axes_0, epsilon = var_46_to_fp16, gamma = model_model_layers_7_post_attention_layernorm_weight_to_fp16, x = input_7_cast_fp16)[name = string("op_394_cast_fp16")];
            tensor<int32, [3]> var_401 = const()[name = string("op_401"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> input_9_axes_0 = const()[name = string("input_9_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 3072, 1]> var_402 = transpose(perm = var_401, x = var_394_cast_fp16)[name = string("transpose_25")];
            tensor<fp16, [1, 3072, 1, 1]> input_9 = expand_dims(axes = input_9_axes_0, x = var_402)[name = string("input_9")];
            string input_11_pad_type_0 = const()[name = string("input_11_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> input_11_strides_0 = const()[name = string("input_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> input_11_pad_0 = const()[name = string("input_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> input_11_dilations_0 = const()[name = string("input_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 input_11_groups_0 = const()[name = string("input_11_groups_0"), val = int32(1)];
            tensor<fp16, [1, 8192, 1, 1]> input_11 = conv(dilations = input_11_dilations_0, groups = input_11_groups_0, pad = input_11_pad_0, pad_type = input_11_pad_type_0, strides = input_11_strides_0, weight = model_model_layers_7_mlp_gate_proj_weight_palettized, x = input_9)[name = string("input_11")];
            string up_states_1_pad_type_0 = const()[name = string("up_states_1_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> up_states_1_strides_0 = const()[name = string("up_states_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> up_states_1_pad_0 = const()[name = string("up_states_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> up_states_1_dilations_0 = const()[name = string("up_states_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 up_states_1_groups_0 = const()[name = string("up_states_1_groups_0"), val = int32(1)];
            tensor<fp16, [1, 8192, 1, 1]> up_states_1 = conv(dilations = up_states_1_dilations_0, groups = up_states_1_groups_0, pad = up_states_1_pad_0, pad_type = up_states_1_pad_type_0, strides = up_states_1_strides_0, weight = model_model_layers_7_mlp_up_proj_weight_palettized, x = input_9)[name = string("up_states_1")];
            tensor<fp16, [1, 8192, 1, 1]> gate_states_1 = silu(x = input_11)[name = string("gate_states_1")];
            tensor<fp16, [1, 8192, 1, 1]> input_13 = mul(x = gate_states_1, y = up_states_1)[name = string("input_13")];
            string hidden_states_7_pad_type_0 = const()[name = string("hidden_states_7_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> hidden_states_7_strides_0 = const()[name = string("hidden_states_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> hidden_states_7_pad_0 = const()[name = string("hidden_states_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> hidden_states_7_dilations_0 = const()[name = string("hidden_states_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 hidden_states_7_groups_0 = const()[name = string("hidden_states_7_groups_0"), val = int32(1)];
            tensor<fp16, [1, 3072, 1, 1]> hidden_states_7 = conv(dilations = hidden_states_7_dilations_0, groups = hidden_states_7_groups_0, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = hidden_states_7_strides_0, weight = model_model_layers_7_mlp_down_proj_weight_palettized, x = input_13)[name = string("hidden_states_7")];
            tensor<int32, [1]> var_424_axes_0 = const()[name = string("op_424_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 3072, 1]> var_424 = squeeze(axes = var_424_axes_0, x = hidden_states_7)[name = string("op_424")];
            tensor<int32, [3]> var_425 = const()[name = string("op_425"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 1, 3072]> var_426 = transpose(perm = var_425, x = var_424)[name = string("transpose_24")];
            tensor<fp16, [1, 1, 3072]> hidden_states_9_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = var_426)[name = string("hidden_states_9_cast_fp16")];
            tensor<int32, [1]> mean_5_axes_0 = const()[name = string("mean_5_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_5_keep_dims_0 = const()[name = string("mean_5_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 1, 1]> mean_5_cast_fp16 = reduce_mean(axes = mean_5_axes_0, keep_dims = mean_5_keep_dims_0, x = hidden_states_9_cast_fp16)[name = string("mean_5_cast_fp16")];
            tensor<fp16, [1, 1, 3072]> input_15_cast_fp16 = sub(x = hidden_states_9_cast_fp16, y = mean_5_cast_fp16)[name = string("input_15_cast_fp16")];
            tensor<int32, [1]> var_434_axes_0 = const()[name = string("op_434_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [3072]> model_model_layers_8_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_8_input_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(726359808)))];
            tensor<fp16, [1, 1, 3072]> var_434_cast_fp16 = layer_norm(axes = var_434_axes_0, epsilon = var_46_to_fp16, gamma = model_model_layers_8_input_layernorm_weight_to_fp16, x = input_15_cast_fp16)[name = string("op_434_cast_fp16")];
            tensor<int32, [3]> var_437 = const()[name = string("op_437"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> var_439_axes_0 = const()[name = string("op_439_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 3072, 1]> var_438 = transpose(perm = var_437, x = var_434_cast_fp16)[name = string("transpose_23")];
            tensor<fp16, [1, 3072, 1, 1]> var_439 = expand_dims(axes = var_439_axes_0, x = var_438)[name = string("op_439")];
            string var_446_pad_type_0 = const()[name = string("op_446_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> var_446_strides_0 = const()[name = string("op_446_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> var_446_pad_0 = const()[name = string("op_446_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> var_446_dilations_0 = const()[name = string("op_446_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 var_446_groups_0 = const()[name = string("op_446_groups_0"), val = int32(1)];
            tensor<fp16, [1, 3072, 1, 1]> var_446 = conv(dilations = var_446_dilations_0, groups = var_446_groups_0, pad = var_446_pad_0, pad_type = var_446_pad_type_0, strides = var_446_strides_0, weight = model_model_layers_8_self_attn_q_proj_weight_palettized, x = var_439)[name = string("op_446")];
            tensor<int32, [4]> var_447 = const()[name = string("op_447"), val = tensor<int32, [4]>([1, 24, 1, 128])];
            tensor<fp16, [1, 24, 1, 128]> var_448 = reshape(shape = var_447, x = var_446)[name = string("op_448")];
            string var_455_pad_type_0 = const()[name = string("op_455_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> var_455_strides_0 = const()[name = string("op_455_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> var_455_pad_0 = const()[name = string("op_455_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> var_455_dilations_0 = const()[name = string("op_455_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 var_455_groups_0 = const()[name = string("op_455_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 1]> var_455 = conv(dilations = var_455_dilations_0, groups = var_455_groups_0, pad = var_455_pad_0, pad_type = var_455_pad_type_0, strides = var_455_strides_0, weight = model_model_layers_8_self_attn_k_proj_weight_palettized, x = var_439)[name = string("op_455")];
            tensor<int32, [4]> var_456 = const()[name = string("op_456"), val = tensor<int32, [4]>([1, 8, 1, 128])];
            tensor<fp16, [1, 8, 1, 128]> var_457 = reshape(shape = var_456, x = var_455)[name = string("op_457")];
            string var_464_pad_type_0 = const()[name = string("op_464_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> var_464_strides_0 = const()[name = string("op_464_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> var_464_pad_0 = const()[name = string("op_464_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> var_464_dilations_0 = const()[name = string("op_464_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 var_464_groups_0 = const()[name = string("op_464_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 1]> var_464 = conv(dilations = var_464_dilations_0, groups = var_464_groups_0, pad = var_464_pad_0, pad_type = var_464_pad_type_0, strides = var_464_strides_0, weight = model_model_layers_8_self_attn_v_proj_weight_palettized, x = var_439)[name = string("op_464")];
            tensor<int32, [4]> var_465 = const()[name = string("op_465"), val = tensor<int32, [4]>([1, 8, 1, 128])];
            tensor<fp16, [1, 8, 1, 128]> var_466 = reshape(shape = var_465, x = var_464)[name = string("op_466")];
            tensor<int32, [4]> x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor<int32, [4]>([1, 24, 1, 64])];
            tensor<bool, [4]> x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 24, 1, 64]> x1_5 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = var_448)[name = string("x1_5")];
            tensor<int32, [4]> x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor<int32, [4]>([1, 24, 1, 128])];
            tensor<bool, [4]> x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 24, 1, 64]> x2_5 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = var_448)[name = string("x2_5")];
            tensor<fp16, [1, 24, 1, 64]> var_480_cast_fp16 = mul(x = x1_5, y = cos_3_cast_fp16)[name = string("op_480_cast_fp16")];
            tensor<fp16, [1, 24, 1, 64]> var_481_cast_fp16 = mul(x = x2_5, y = sin_3_cast_fp16)[name = string("op_481_cast_fp16")];
            tensor<fp16, [1, 24, 1, 64]> var_482_cast_fp16 = sub(x = var_480_cast_fp16, y = var_481_cast_fp16)[name = string("op_482_cast_fp16")];
            tensor<fp16, [1, 24, 1, 64]> var_483_cast_fp16 = mul(x = x2_5, y = cos_3_cast_fp16)[name = string("op_483_cast_fp16")];
            tensor<fp16, [1, 24, 1, 64]> var_484_cast_fp16 = mul(x = x1_5, y = sin_3_cast_fp16)[name = string("op_484_cast_fp16")];
            tensor<fp16, [1, 24, 1, 64]> var_485_cast_fp16 = add(x = var_483_cast_fp16, y = var_484_cast_fp16)[name = string("op_485_cast_fp16")];
            bool rotated_5_interleave_0 = const()[name = string("rotated_5_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 24, 1, 128]> rotated_5_cast_fp16 = concat(axis = var_51, interleave = rotated_5_interleave_0, values = (var_482_cast_fp16, var_485_cast_fp16))[name = string("rotated_5_cast_fp16")];
            tensor<int32, [4]> x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor<int32, [4]>([1, 8, 1, 64])];
            tensor<bool, [4]> x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 8, 1, 64]> x1_7 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = var_457)[name = string("x1_7")];
            tensor<int32, [4]> x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor<int32, [4]>([1, 8, 1, 128])];
            tensor<bool, [4]> x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 8, 1, 64]> x2_7 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = var_457)[name = string("x2_7")];
            tensor<fp16, [1, 8, 1, 64]> var_501_cast_fp16 = mul(x = x1_7, y = cos_3_cast_fp16)[name = string("op_501_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_502_cast_fp16 = mul(x = x2_7, y = sin_3_cast_fp16)[name = string("op_502_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_503_cast_fp16 = sub(x = var_501_cast_fp16, y = var_502_cast_fp16)[name = string("op_503_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_504_cast_fp16 = mul(x = x2_7, y = cos_3_cast_fp16)[name = string("op_504_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_505_cast_fp16 = mul(x = x1_7, y = sin_3_cast_fp16)[name = string("op_505_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_506_cast_fp16 = add(x = var_504_cast_fp16, y = var_505_cast_fp16)[name = string("op_506_cast_fp16")];
            bool rotated_7_interleave_0 = const()[name = string("rotated_7_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 8, 1, 128]> rotated_7_cast_fp16 = concat(axis = var_51, interleave = rotated_7_interleave_0, values = (var_503_cast_fp16, var_506_cast_fp16))[name = string("rotated_7_cast_fp16")];
            tensor<int32, [1]> expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor<int32, [1]>([8])];
            tensor<int32, [1]> expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor<int32, [1]>([9])];
            int32 concat_10_axis_0 = const()[name = string("concat_10_axis_0"), val = int32(0)];
            bool concat_10_interleave_0 = const()[name = string("concat_10_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_10 = concat(axis = concat_10_axis_0, interleave = concat_10_interleave_0, values = (expand_dims_12, expand_dims_13, current_pos, expand_dims_15))[name = string("concat_10")];
            tensor<int32, [1]> concat_11_values1_0 = const()[name = string("concat_11_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_11_values3_0 = const()[name = string("concat_11_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_11_axis_0 = const()[name = string("concat_11_axis_0"), val = int32(0)];
            bool concat_11_interleave_0 = const()[name = string("concat_11_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_11 = concat(axis = concat_11_axis_0, interleave = concat_11_interleave_0, values = (expand_dims_16, concat_11_values1_0, var_326, concat_11_values3_0))[name = string("concat_11")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_10, begin_mask = model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0, end = concat_11, end_mask = model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_3_stride_0, update = rotated_7_cast_fp16, x = coreml_update_state_15)[name = string("model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_2_write_state")];
            tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_16 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_2")];
            tensor<int32, [1]> expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor<int32, [1]>([36])];
            tensor<int32, [1]> expand_dims_19 = const()[name = string("expand_dims_19"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor<int32, [1]>([37])];
            int32 concat_14_axis_0 = const()[name = string("concat_14_axis_0"), val = int32(0)];
            bool concat_14_interleave_0 = const()[name = string("concat_14_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_14 = concat(axis = concat_14_axis_0, interleave = concat_14_interleave_0, values = (expand_dims_18, expand_dims_19, current_pos, expand_dims_21))[name = string("concat_14")];
            tensor<int32, [1]> concat_15_values1_0 = const()[name = string("concat_15_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_15_values3_0 = const()[name = string("concat_15_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)];
            bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (expand_dims_22, concat_15_values1_0, var_326, concat_15_values3_0))[name = string("concat_15")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_14, begin_mask = model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0, end = concat_15, end_mask = model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_4_stride_0, update = var_466, x = coreml_update_state_16)[name = string("model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_3_write_state")];
            tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_17 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_3")];
            tensor<int32, [4]> var_526_begin_0 = const()[name = string("op_526_begin_0"), val = tensor<int32, [4]>([8, 0, 0, 0])];
            tensor<int32, [4]> var_526_end_0 = const()[name = string("op_526_end_0"), val = tensor<int32, [4]>([9, 8, 1024, 128])];
            tensor<bool, [4]> var_526_end_mask_0 = const()[name = string("op_526_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_526_cast_fp16 = slice_by_index(begin = var_526_begin_0, end = var_526_end_0, end_mask = var_526_end_mask_0, x = coreml_update_state_17)[name = string("op_526_cast_fp16")];
            tensor<int32, [1]> K_layer_cache_3_axes_0 = const()[name = string("K_layer_cache_3_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> K_layer_cache_3_cast_fp16 = squeeze(axes = K_layer_cache_3_axes_0, x = var_526_cast_fp16)[name = string("K_layer_cache_3_cast_fp16")];
            tensor<int32, [4]> var_528_begin_0 = const()[name = string("op_528_begin_0"), val = tensor<int32, [4]>([36, 0, 0, 0])];
            tensor<int32, [4]> var_528_end_0 = const()[name = string("op_528_end_0"), val = tensor<int32, [4]>([37, 8, 1024, 128])];
            tensor<bool, [4]> var_528_end_mask_0 = const()[name = string("op_528_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_528_cast_fp16 = slice_by_index(begin = var_528_begin_0, end = var_528_end_0, end_mask = var_528_end_mask_0, x = coreml_update_state_17)[name = string("op_528_cast_fp16")];
            tensor<int32, [1]> V_layer_cache_3_axes_0 = const()[name = string("V_layer_cache_3_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> V_layer_cache_3_cast_fp16 = squeeze(axes = V_layer_cache_3_axes_0, x = var_528_cast_fp16)[name = string("V_layer_cache_3_cast_fp16")];
            tensor<int32, [1]> x_39_axes_0 = const()[name = string("x_39_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_39_cast_fp16 = expand_dims(axes = x_39_axes_0, x = K_layer_cache_3_cast_fp16)[name = string("x_39_cast_fp16")];
            tensor<int32, [4]> var_537 = const()[name = string("op_537"), val = tensor<int32, [4]>([1, 3, 1, 1])];
            tensor<fp16, [8, 3, 1024, 128]> x_41_cast_fp16 = tile(reps = var_537, x = x_39_cast_fp16)[name = string("x_41_cast_fp16")];
            tensor<int32, [4]> var_541 = const()[name = string("op_541"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
            tensor<fp16, [1, 24, 1024, 128]> key_states_7_cast_fp16 = reshape(shape = var_541, x = x_41_cast_fp16)[name = string("key_states_7_cast_fp16")];
            tensor<int32, [1]> x_45_axes_0 = const()[name = string("x_45_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_45_cast_fp16 = expand_dims(axes = x_45_axes_0, x = V_layer_cache_3_cast_fp16)[name = string("x_45_cast_fp16")];
            tensor<int32, [4]> var_544 = const()[name = string("op_544"), val = tensor<int32, [4]>([1, 3, 1, 1])];
            tensor<fp16, [8, 3, 1024, 128]> x_47_cast_fp16 = tile(reps = var_544, x = x_45_cast_fp16)[name = string("x_47_cast_fp16")];
            tensor<int32, [4]> var_548 = const()[name = string("op_548"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
            tensor<fp16, [1, 24, 1024, 128]> value_states_7_cast_fp16 = reshape(shape = var_548, x = x_47_cast_fp16)[name = string("value_states_7_cast_fp16")];
            bool var_551_transpose_x_1 = const()[name = string("op_551_transpose_x_1"), val = bool(false)];
            bool var_551_transpose_y_1 = const()[name = string("op_551_transpose_y_1"), val = bool(true)];
            tensor<fp16, [1, 24, 1, 1024]> var_551_cast_fp16 = matmul(transpose_x = var_551_transpose_x_1, transpose_y = var_551_transpose_y_1, x = rotated_5_cast_fp16, y = key_states_7_cast_fp16)[name = string("op_551_cast_fp16")];
            fp16 var_552_to_fp16 = const()[name = string("op_552_to_fp16"), val = fp16(0x1.6ap-4)];
            tensor<fp16, [1, 24, 1, 1024]> attn_weights_5_cast_fp16 = mul(x = var_551_cast_fp16, y = var_552_to_fp16)[name = string("attn_weights_5_cast_fp16")];
            tensor<fp16, [1, 24, 1, 1024]> x_49_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = causal_mask)[name = string("x_49_cast_fp16")];
            tensor<int32, [1]> reduce_max_1_axes_0 = const()[name = string("reduce_max_1_axes_0"), val = tensor<int32, [1]>([-1])];
            bool reduce_max_1_keep_dims_0 = const()[name = string("reduce_max_1_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 24, 1, 1]> reduce_max_1_cast_fp16 = reduce_max(axes = reduce_max_1_axes_0, keep_dims = reduce_max_1_keep_dims_0, x = x_49_cast_fp16)[name = string("reduce_max_1_cast_fp16")];
            tensor<fp16, [1, 24, 1, 1024]> x_51_cast_fp16 = sub(x = x_49_cast_fp16, y = reduce_max_1_cast_fp16)[name = string("x_51_cast_fp16")];
            tensor<fp16, [1, 24, 1, 1024]> exp_x_3_cast_fp16 = exp(x = x_51_cast_fp16)[name = string("exp_x_3_cast_fp16")];
            tensor<int32, [1]> var_563_axes_0 = const()[name = string("op_563_axes_0"), val = tensor<int32, [1]>([-1])];
            bool var_563_keep_dims_0 = const()[name = string("op_563_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 24, 1, 1]> var_563_cast_fp16 = reduce_sum(axes = var_563_axes_0, keep_dims = var_563_keep_dims_0, x = exp_x_3_cast_fp16)[name = string("op_563_cast_fp16")];
            tensor<fp16, [1, 24, 1, 1024]> attn_weights_7_cast_fp16 = real_div(x = exp_x_3_cast_fp16, y = var_563_cast_fp16)[name = string("attn_weights_7_cast_fp16")];
            bool attn_output_7_transpose_x_0 = const()[name = string("attn_output_7_transpose_x_0"), val = bool(false)];
            bool attn_output_7_transpose_y_0 = const()[name = string("attn_output_7_transpose_y_0"), val = bool(false)];
            tensor<fp16, [1, 24, 1, 128]> attn_output_7_cast_fp16 = matmul(transpose_x = attn_output_7_transpose_x_0, transpose_y = attn_output_7_transpose_y_0, x = attn_weights_7_cast_fp16, y = value_states_7_cast_fp16)[name = string("attn_output_7_cast_fp16")];
            tensor<int32, [4]> var_566_perm_0 = const()[name = string("op_566_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_568 = const()[name = string("op_568"), val = tensor<int32, [3]>([1, 1, 3072])];
            tensor<fp16, [1, 1, 24, 128]> var_566_cast_fp16 = transpose(perm = var_566_perm_0, x = attn_output_7_cast_fp16)[name = string("transpose_22")];
            tensor<fp16, [1, 1, 3072]> input_19_cast_fp16 = reshape(shape = var_568, x = var_566_cast_fp16)[name = string("input_19_cast_fp16")];
            tensor<fp16, [3072, 3072]> model_model_layers_8_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(726366016))), lut = tensor<fp16, [384, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(735803264))))[name = string("model_model_layers_8_self_attn_o_proj_weight_promoted_to_fp16_palettized")];
            tensor<fp16, [1, 1, 3072]> linear_1_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_8_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_19_cast_fp16)[name = string("linear_1_cast_fp16")];
            tensor<fp16, [1, 1, 3072]> hidden_states_13_cast_fp16 = add(x = hidden_states_9_cast_fp16, y = linear_1_cast_fp16)[name = string("hidden_states_13_cast_fp16")];
            tensor<int32, [1]> mean_7_axes_0 = const()[name = string("mean_7_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_7_keep_dims_0 = const()[name = string("mean_7_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 1, 1]> mean_7_cast_fp16 = reduce_mean(axes = mean_7_axes_0, keep_dims = mean_7_keep_dims_0, x = hidden_states_13_cast_fp16)[name = string("mean_7_cast_fp16")];
            tensor<fp16, [1, 1, 3072]> input_21_cast_fp16 = sub(x = hidden_states_13_cast_fp16, y = mean_7_cast_fp16)[name = string("input_21_cast_fp16")];
            tensor<int32, [1]> var_579_axes_0 = const()[name = string("op_579_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [3072]> model_model_layers_8_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_8_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(735999936)))];
            tensor<fp16, [1, 1, 3072]> var_579_cast_fp16 = layer_norm(axes = var_579_axes_0, epsilon = var_46_to_fp16, gamma = model_model_layers_8_post_attention_layernorm_weight_to_fp16, x = input_21_cast_fp16)[name = string("op_579_cast_fp16")];
            tensor<int32, [3]> var_586 = const()[name = string("op_586"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> input_23_axes_0 = const()[name = string("input_23_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 3072, 1]> var_587 = transpose(perm = var_586, x = var_579_cast_fp16)[name = string("transpose_21")];
            tensor<fp16, [1, 3072, 1, 1]> input_23 = expand_dims(axes = input_23_axes_0, x = var_587)[name = string("input_23")];
            string input_25_pad_type_0 = const()[name = string("input_25_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> input_25_strides_0 = const()[name = string("input_25_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> input_25_pad_0 = const()[name = string("input_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> input_25_dilations_0 = const()[name = string("input_25_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 input_25_groups_0 = const()[name = string("input_25_groups_0"), val = int32(1)];
            tensor<fp16, [1, 8192, 1, 1]> input_25 = conv(dilations = input_25_dilations_0, groups = input_25_groups_0, pad = input_25_pad_0, pad_type = input_25_pad_type_0, strides = input_25_strides_0, weight = model_model_layers_8_mlp_gate_proj_weight_palettized, x = input_23)[name = string("input_25")];
            string up_states_3_pad_type_0 = const()[name = string("up_states_3_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> up_states_3_strides_0 = const()[name = string("up_states_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> up_states_3_pad_0 = const()[name = string("up_states_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> up_states_3_dilations_0 = const()[name = string("up_states_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 up_states_3_groups_0 = const()[name = string("up_states_3_groups_0"), val = int32(1)];
            tensor<fp16, [1, 8192, 1, 1]> up_states_3 = conv(dilations = up_states_3_dilations_0, groups = up_states_3_groups_0, pad = up_states_3_pad_0, pad_type = up_states_3_pad_type_0, strides = up_states_3_strides_0, weight = model_model_layers_8_mlp_up_proj_weight_palettized, x = input_23)[name = string("up_states_3")];
            tensor<fp16, [1, 8192, 1, 1]> gate_states_3 = silu(x = input_25)[name = string("gate_states_3")];
            tensor<fp16, [1, 8192, 1, 1]> input_27 = mul(x = gate_states_3, y = up_states_3)[name = string("input_27")];
            string hidden_states_15_pad_type_0 = const()[name = string("hidden_states_15_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> hidden_states_15_strides_0 = const()[name = string("hidden_states_15_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> hidden_states_15_pad_0 = const()[name = string("hidden_states_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> hidden_states_15_dilations_0 = const()[name = string("hidden_states_15_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 hidden_states_15_groups_0 = const()[name = string("hidden_states_15_groups_0"), val = int32(1)];
            tensor<fp16, [1, 3072, 1, 1]> hidden_states_15 = conv(dilations = hidden_states_15_dilations_0, groups = hidden_states_15_groups_0, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = hidden_states_15_strides_0, weight = model_model_layers_8_mlp_down_proj_weight_palettized, x = input_27)[name = string("hidden_states_15")];
            tensor<int32, [1]> var_609_axes_0 = const()[name = string("op_609_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 3072, 1]> var_609 = squeeze(axes = var_609_axes_0, x = hidden_states_15)[name = string("op_609")];
            tensor<int32, [3]> var_610 = const()[name = string("op_610"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 1, 3072]> var_611 = transpose(perm = var_610, x = var_609)[name = string("transpose_20")];
            tensor<fp16, [1, 1, 3072]> hidden_states_17_cast_fp16 = add(x = hidden_states_13_cast_fp16, y = var_611)[name = string("hidden_states_17_cast_fp16")];
            tensor<int32, [1]> mean_9_axes_0 = const()[name = string("mean_9_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_9_keep_dims_0 = const()[name = string("mean_9_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 1, 1]> mean_9_cast_fp16 = reduce_mean(axes = mean_9_axes_0, keep_dims = mean_9_keep_dims_0, x = hidden_states_17_cast_fp16)[name = string("mean_9_cast_fp16")];
            tensor<fp16, [1, 1, 3072]> input_29_cast_fp16 = sub(x = hidden_states_17_cast_fp16, y = mean_9_cast_fp16)[name = string("input_29_cast_fp16")];
            tensor<int32, [1]> var_619_axes_0 = const()[name = string("op_619_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [3072]> model_model_layers_9_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_9_input_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736006144)))];
            tensor<fp16, [1, 1, 3072]> var_619_cast_fp16 = layer_norm(axes = var_619_axes_0, epsilon = var_46_to_fp16, gamma = model_model_layers_9_input_layernorm_weight_to_fp16, x = input_29_cast_fp16)[name = string("op_619_cast_fp16")];
            tensor<int32, [3]> var_622 = const()[name = string("op_622"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> var_624_axes_0 = const()[name = string("op_624_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 3072, 1]> var_623 = transpose(perm = var_622, x = var_619_cast_fp16)[name = string("transpose_19")];
            tensor<fp16, [1, 3072, 1, 1]> var_624 = expand_dims(axes = var_624_axes_0, x = var_623)[name = string("op_624")];
            string var_631_pad_type_0 = const()[name = string("op_631_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> var_631_strides_0 = const()[name = string("op_631_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> var_631_pad_0 = const()[name = string("op_631_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> var_631_dilations_0 = const()[name = string("op_631_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 var_631_groups_0 = const()[name = string("op_631_groups_0"), val = int32(1)];
            tensor<fp16, [1, 3072, 1, 1]> var_631 = conv(dilations = var_631_dilations_0, groups = var_631_groups_0, pad = var_631_pad_0, pad_type = var_631_pad_type_0, strides = var_631_strides_0, weight = model_model_layers_9_self_attn_q_proj_weight_palettized, x = var_624)[name = string("op_631")];
            tensor<int32, [4]> var_632 = const()[name = string("op_632"), val = tensor<int32, [4]>([1, 24, 1, 128])];
            tensor<fp16, [1, 24, 1, 128]> var_633 = reshape(shape = var_632, x = var_631)[name = string("op_633")];
            string var_640_pad_type_0 = const()[name = string("op_640_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> var_640_strides_0 = const()[name = string("op_640_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> var_640_pad_0 = const()[name = string("op_640_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> var_640_dilations_0 = const()[name = string("op_640_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 var_640_groups_0 = const()[name = string("op_640_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 1]> var_640 = conv(dilations = var_640_dilations_0, groups = var_640_groups_0, pad = var_640_pad_0, pad_type = var_640_pad_type_0, strides = var_640_strides_0, weight = model_model_layers_9_self_attn_k_proj_weight_palettized, x = var_624)[name = string("op_640")];
            tensor<int32, [4]> var_641 = const()[name = string("op_641"), val = tensor<int32, [4]>([1, 8, 1, 128])];
            tensor<fp16, [1, 8, 1, 128]> var_642 = reshape(shape = var_641, x = var_640)[name = string("op_642")];
            string var_649_pad_type_0 = const()[name = string("op_649_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> var_649_strides_0 = const()[name = string("op_649_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> var_649_pad_0 = const()[name = string("op_649_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> var_649_dilations_0 = const()[name = string("op_649_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 var_649_groups_0 = const()[name = string("op_649_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 1]> var_649 = conv(dilations = var_649_dilations_0, groups = var_649_groups_0, pad = var_649_pad_0, pad_type = var_649_pad_type_0, strides = var_649_strides_0, weight = model_model_layers_9_self_attn_v_proj_weight_palettized, x = var_624)[name = string("op_649")];
            tensor<int32, [4]> var_650 = const()[name = string("op_650"), val = tensor<int32, [4]>([1, 8, 1, 128])];
            tensor<fp16, [1, 8, 1, 128]> var_651 = reshape(shape = var_650, x = var_649)[name = string("op_651")];
            tensor<int32, [4]> x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor<int32, [4]>([1, 24, 1, 64])];
            tensor<bool, [4]> x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 24, 1, 64]> x1_9 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = var_633)[name = string("x1_9")];
            tensor<int32, [4]> x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor<int32, [4]>([1, 24, 1, 128])];
            tensor<bool, [4]> x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 24, 1, 64]> x2_9 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = var_633)[name = string("x2_9")];
            tensor<fp16, [1, 24, 1, 64]> var_665_cast_fp16 = mul(x = x1_9, y = cos_3_cast_fp16)[name = string("op_665_cast_fp16")];
            tensor<fp16, [1, 24, 1, 64]> var_666_cast_fp16 = mul(x = x2_9, y = sin_3_cast_fp16)[name = string("op_666_cast_fp16")];
            tensor<fp16, [1, 24, 1, 64]> var_667_cast_fp16 = sub(x = var_665_cast_fp16, y = var_666_cast_fp16)[name = string("op_667_cast_fp16")];
            tensor<fp16, [1, 24, 1, 64]> var_668_cast_fp16 = mul(x = x2_9, y = cos_3_cast_fp16)[name = string("op_668_cast_fp16")];
            tensor<fp16, [1, 24, 1, 64]> var_669_cast_fp16 = mul(x = x1_9, y = sin_3_cast_fp16)[name = string("op_669_cast_fp16")];
            tensor<fp16, [1, 24, 1, 64]> var_670_cast_fp16 = add(x = var_668_cast_fp16, y = var_669_cast_fp16)[name = string("op_670_cast_fp16")];
            bool rotated_9_interleave_0 = const()[name = string("rotated_9_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 24, 1, 128]> rotated_9_cast_fp16 = concat(axis = var_51, interleave = rotated_9_interleave_0, values = (var_667_cast_fp16, var_670_cast_fp16))[name = string("rotated_9_cast_fp16")];
            tensor<int32, [4]> x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor<int32, [4]>([1, 8, 1, 64])];
            tensor<bool, [4]> x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 8, 1, 64]> x1_11 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = var_642)[name = string("x1_11")];
            tensor<int32, [4]> x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor<int32, [4]>([1, 8, 1, 128])];
            tensor<bool, [4]> x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 8, 1, 64]> x2_11 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = var_642)[name = string("x2_11")];
            tensor<fp16, [1, 8, 1, 64]> var_686_cast_fp16 = mul(x = x1_11, y = cos_3_cast_fp16)[name = string("op_686_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_687_cast_fp16 = mul(x = x2_11, y = sin_3_cast_fp16)[name = string("op_687_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_688_cast_fp16 = sub(x = var_686_cast_fp16, y = var_687_cast_fp16)[name = string("op_688_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_689_cast_fp16 = mul(x = x2_11, y = cos_3_cast_fp16)[name = string("op_689_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_690_cast_fp16 = mul(x = x1_11, y = sin_3_cast_fp16)[name = string("op_690_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_691_cast_fp16 = add(x = var_689_cast_fp16, y = var_690_cast_fp16)[name = string("op_691_cast_fp16")];
            bool rotated_11_interleave_0 = const()[name = string("rotated_11_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 8, 1, 128]> rotated_11_cast_fp16 = concat(axis = var_51, interleave = rotated_11_interleave_0, values = (var_688_cast_fp16, var_691_cast_fp16))[name = string("rotated_11_cast_fp16")];
            tensor<int32, [1]> expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor<int32, [1]>([9])];
            tensor<int32, [1]> expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_28 = const()[name = string("expand_dims_28"), val = tensor<int32, [1]>([10])];
            int32 concat_18_axis_0 = const()[name = string("concat_18_axis_0"), val = int32(0)];
            bool concat_18_interleave_0 = const()[name = string("concat_18_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_18 = concat(axis = concat_18_axis_0, interleave = concat_18_interleave_0, values = (expand_dims_24, expand_dims_25, current_pos, expand_dims_27))[name = string("concat_18")];
            tensor<int32, [1]> concat_19_values1_0 = const()[name = string("concat_19_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_19_values3_0 = const()[name = string("concat_19_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_19_axis_0 = const()[name = string("concat_19_axis_0"), val = int32(0)];
            bool concat_19_interleave_0 = const()[name = string("concat_19_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_19 = concat(axis = concat_19_axis_0, interleave = concat_19_interleave_0, values = (expand_dims_28, concat_19_values1_0, var_326, concat_19_values3_0))[name = string("concat_19")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_18, begin_mask = model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0, end = concat_19, end_mask = model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_5_stride_0, update = rotated_11_cast_fp16, x = coreml_update_state_17)[name = string("model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_4_write_state")];
            tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_18 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_4")];
            tensor<int32, [1]> expand_dims_30 = const()[name = string("expand_dims_30"), val = tensor<int32, [1]>([37])];
            tensor<int32, [1]> expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_33 = const()[name = string("expand_dims_33"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor<int32, [1]>([38])];
            int32 concat_22_axis_0 = const()[name = string("concat_22_axis_0"), val = int32(0)];
            bool concat_22_interleave_0 = const()[name = string("concat_22_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_22 = concat(axis = concat_22_axis_0, interleave = concat_22_interleave_0, values = (expand_dims_30, expand_dims_31, current_pos, expand_dims_33))[name = string("concat_22")];
            tensor<int32, [1]> concat_23_values1_0 = const()[name = string("concat_23_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_23_values3_0 = const()[name = string("concat_23_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_23_axis_0 = const()[name = string("concat_23_axis_0"), val = int32(0)];
            bool concat_23_interleave_0 = const()[name = string("concat_23_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_23 = concat(axis = concat_23_axis_0, interleave = concat_23_interleave_0, values = (expand_dims_34, concat_23_values1_0, var_326, concat_23_values3_0))[name = string("concat_23")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_22, begin_mask = model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0, end = concat_23, end_mask = model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_6_stride_0, update = var_651, x = coreml_update_state_18)[name = string("model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_5_write_state")];
            tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_19 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_5")];
            tensor<int32, [4]> var_711_begin_0 = const()[name = string("op_711_begin_0"), val = tensor<int32, [4]>([9, 0, 0, 0])];
            tensor<int32, [4]> var_711_end_0 = const()[name = string("op_711_end_0"), val = tensor<int32, [4]>([10, 8, 1024, 128])];
            tensor<bool, [4]> var_711_end_mask_0 = const()[name = string("op_711_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_711_cast_fp16 = slice_by_index(begin = var_711_begin_0, end = var_711_end_0, end_mask = var_711_end_mask_0, x = coreml_update_state_19)[name = string("op_711_cast_fp16")];
            tensor<int32, [1]> K_layer_cache_5_axes_0 = const()[name = string("K_layer_cache_5_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> K_layer_cache_5_cast_fp16 = squeeze(axes = K_layer_cache_5_axes_0, x = var_711_cast_fp16)[name = string("K_layer_cache_5_cast_fp16")];
            tensor<int32, [4]> var_713_begin_0 = const()[name = string("op_713_begin_0"), val = tensor<int32, [4]>([37, 0, 0, 0])];
            tensor<int32, [4]> var_713_end_0 = const()[name = string("op_713_end_0"), val = tensor<int32, [4]>([38, 8, 1024, 128])];
            tensor<bool, [4]> var_713_end_mask_0 = const()[name = string("op_713_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_713_cast_fp16 = slice_by_index(begin = var_713_begin_0, end = var_713_end_0, end_mask = var_713_end_mask_0, x = coreml_update_state_19)[name = string("op_713_cast_fp16")];
            tensor<int32, [1]> V_layer_cache_5_axes_0 = const()[name = string("V_layer_cache_5_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> V_layer_cache_5_cast_fp16 = squeeze(axes = V_layer_cache_5_axes_0, x = var_713_cast_fp16)[name = string("V_layer_cache_5_cast_fp16")];
            tensor<int32, [1]> x_67_axes_0 = const()[name = string("x_67_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_67_cast_fp16 = expand_dims(axes = x_67_axes_0, x = K_layer_cache_5_cast_fp16)[name = string("x_67_cast_fp16")];
            tensor<int32, [4]> var_722 = const()[name = string("op_722"), val = tensor<int32, [4]>([1, 3, 1, 1])];
            tensor<fp16, [8, 3, 1024, 128]> x_69_cast_fp16 = tile(reps = var_722, x = x_67_cast_fp16)[name = string("x_69_cast_fp16")];
            tensor<int32, [4]> var_726 = const()[name = string("op_726"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
            tensor<fp16, [1, 24, 1024, 128]> key_states_11_cast_fp16 = reshape(shape = var_726, x = x_69_cast_fp16)[name = string("key_states_11_cast_fp16")];
            tensor<int32, [1]> x_73_axes_0 = const()[name = string("x_73_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_73_cast_fp16 = expand_dims(axes = x_73_axes_0, x = V_layer_cache_5_cast_fp16)[name = string("x_73_cast_fp16")];
            tensor<int32, [4]> var_729 = const()[name = string("op_729"), val = tensor<int32, [4]>([1, 3, 1, 1])];
            tensor<fp16, [8, 3, 1024, 128]> x_75_cast_fp16 = tile(reps = var_729, x = x_73_cast_fp16)[name = string("x_75_cast_fp16")];
            tensor<int32, [4]> var_733 = const()[name = string("op_733"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
            tensor<fp16, [1, 24, 1024, 128]> value_states_11_cast_fp16 = reshape(shape = var_733, x = x_75_cast_fp16)[name = string("value_states_11_cast_fp16")];
            bool var_736_transpose_x_1 = const()[name = string("op_736_transpose_x_1"), val = bool(false)];
            bool var_736_transpose_y_1 = const()[name = string("op_736_transpose_y_1"), val = bool(true)];
            tensor<fp16, [1, 24, 1, 1024]> var_736_cast_fp16 = matmul(transpose_x = var_736_transpose_x_1, transpose_y = var_736_transpose_y_1, x = rotated_9_cast_fp16, y = key_states_11_cast_fp16)[name = string("op_736_cast_fp16")];
            fp16 var_737_to_fp16 = const()[name = string("op_737_to_fp16"), val = fp16(0x1.6ap-4)];
            tensor<fp16, [1, 24, 1, 1024]> attn_weights_9_cast_fp16 = mul(x = var_736_cast_fp16, y = var_737_to_fp16)[name = string("attn_weights_9_cast_fp16")];
            tensor<fp16, [1, 24, 1, 1024]> x_77_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = causal_mask)[name = string("x_77_cast_fp16")];
            tensor<int32, [1]> reduce_max_2_axes_0 = const()[name = string("reduce_max_2_axes_0"), val = tensor<int32, [1]>([-1])];
            bool reduce_max_2_keep_dims_0 = const()[name = string("reduce_max_2_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 24, 1, 1]> reduce_max_2_cast_fp16 = reduce_max(axes = reduce_max_2_axes_0, keep_dims = reduce_max_2_keep_dims_0, x = x_77_cast_fp16)[name = string("reduce_max_2_cast_fp16")];
            tensor<fp16, [1, 24, 1, 1024]> x_79_cast_fp16 = sub(x = x_77_cast_fp16, y = reduce_max_2_cast_fp16)[name = string("x_79_cast_fp16")];
            tensor<fp16, [1, 24, 1, 1024]> exp_x_5_cast_fp16 = exp(x = x_79_cast_fp16)[name = string("exp_x_5_cast_fp16")];
            tensor<int32, [1]> var_748_axes_0 = const()[name = string("op_748_axes_0"), val = tensor<int32, [1]>([-1])];
            bool var_748_keep_dims_0 = const()[name = string("op_748_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 24, 1, 1]> var_748_cast_fp16 = reduce_sum(axes = var_748_axes_0, keep_dims = var_748_keep_dims_0, x = exp_x_5_cast_fp16)[name = string("op_748_cast_fp16")];
            tensor<fp16, [1, 24, 1, 1024]> attn_weights_11_cast_fp16 = real_div(x = exp_x_5_cast_fp16, y = var_748_cast_fp16)[name = string("attn_weights_11_cast_fp16")];
            bool attn_output_13_transpose_x_0 = const()[name = string("attn_output_13_transpose_x_0"), val = bool(false)];
            bool attn_output_13_transpose_y_0 = const()[name = string("attn_output_13_transpose_y_0"), val = bool(false)];
            tensor<fp16, [1, 24, 1, 128]> attn_output_13_cast_fp16 = matmul(transpose_x = attn_output_13_transpose_x_0, transpose_y = attn_output_13_transpose_y_0, x = attn_weights_11_cast_fp16, y = value_states_11_cast_fp16)[name = string("attn_output_13_cast_fp16")];
            tensor<int32, [4]> var_751_perm_0 = const()[name = string("op_751_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_753 = const()[name = string("op_753"), val = tensor<int32, [3]>([1, 1, 3072])];
            tensor<fp16, [1, 1, 24, 128]> var_751_cast_fp16 = transpose(perm = var_751_perm_0, x = attn_output_13_cast_fp16)[name = string("transpose_18")];
            tensor<fp16, [1, 1, 3072]> input_33_cast_fp16 = reshape(shape = var_753, x = var_751_cast_fp16)[name = string("input_33_cast_fp16")];
            tensor<fp16, [3072, 3072]> model_model_layers_9_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736012352))), lut = tensor<fp16, [384, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(745449600))))[name = string("model_model_layers_9_self_attn_o_proj_weight_promoted_to_fp16_palettized")];
            tensor<fp16, [1, 1, 3072]> linear_2_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_9_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_33_cast_fp16)[name = string("linear_2_cast_fp16")];
            tensor<fp16, [1, 1, 3072]> hidden_states_21_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = linear_2_cast_fp16)[name = string("hidden_states_21_cast_fp16")];
            tensor<int32, [1]> mean_11_axes_0 = const()[name = string("mean_11_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_11_keep_dims_0 = const()[name = string("mean_11_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 1, 1]> mean_11_cast_fp16 = reduce_mean(axes = mean_11_axes_0, keep_dims = mean_11_keep_dims_0, x = hidden_states_21_cast_fp16)[name = string("mean_11_cast_fp16")];
            tensor<fp16, [1, 1, 3072]> input_35_cast_fp16 = sub(x = hidden_states_21_cast_fp16, y = mean_11_cast_fp16)[name = string("input_35_cast_fp16")];
            tensor<int32, [1]> var_764_axes_0 = const()[name = string("op_764_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [3072]> model_model_layers_9_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_9_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(745646272)))];
            tensor<fp16, [1, 1, 3072]> var_764_cast_fp16 = layer_norm(axes = var_764_axes_0, epsilon = var_46_to_fp16, gamma = model_model_layers_9_post_attention_layernorm_weight_to_fp16, x = input_35_cast_fp16)[name = string("op_764_cast_fp16")];
            tensor<int32, [3]> var_771 = const()[name = string("op_771"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> input_37_axes_0 = const()[name = string("input_37_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 3072, 1]> var_772 = transpose(perm = var_771, x = var_764_cast_fp16)[name = string("transpose_17")];
            tensor<fp16, [1, 3072, 1, 1]> input_37 = expand_dims(axes = input_37_axes_0, x = var_772)[name = string("input_37")];
            string input_39_pad_type_0 = const()[name = string("input_39_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> input_39_strides_0 = const()[name = string("input_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> input_39_pad_0 = const()[name = string("input_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> input_39_dilations_0 = const()[name = string("input_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 input_39_groups_0 = const()[name = string("input_39_groups_0"), val = int32(1)];
            tensor<fp16, [1, 8192, 1, 1]> input_39 = conv(dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = model_model_layers_9_mlp_gate_proj_weight_palettized, x = input_37)[name = string("input_39")];
            string up_states_5_pad_type_0 = const()[name = string("up_states_5_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> up_states_5_strides_0 = const()[name = string("up_states_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> up_states_5_pad_0 = const()[name = string("up_states_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> up_states_5_dilations_0 = const()[name = string("up_states_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 up_states_5_groups_0 = const()[name = string("up_states_5_groups_0"), val = int32(1)];
            tensor<fp16, [1, 8192, 1, 1]> up_states_5 = conv(dilations = up_states_5_dilations_0, groups = up_states_5_groups_0, pad = up_states_5_pad_0, pad_type = up_states_5_pad_type_0, strides = up_states_5_strides_0, weight = model_model_layers_9_mlp_up_proj_weight_palettized, x = input_37)[name = string("up_states_5")];
            tensor<fp16, [1, 8192, 1, 1]> gate_states_5 = silu(x = input_39)[name = string("gate_states_5")];
            tensor<fp16, [1, 8192, 1, 1]> input_41 = mul(x = gate_states_5, y = up_states_5)[name = string("input_41")];
            string hidden_states_23_pad_type_0 = const()[name = string("hidden_states_23_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> hidden_states_23_strides_0 = const()[name = string("hidden_states_23_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> hidden_states_23_pad_0 = const()[name = string("hidden_states_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> hidden_states_23_dilations_0 = const()[name = string("hidden_states_23_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 hidden_states_23_groups_0 = const()[name = string("hidden_states_23_groups_0"), val = int32(1)];
            tensor<fp16, [1, 3072, 1, 1]> hidden_states_23 = conv(dilations = hidden_states_23_dilations_0, groups = hidden_states_23_groups_0, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = hidden_states_23_strides_0, weight = model_model_layers_9_mlp_down_proj_weight_palettized, x = input_41)[name = string("hidden_states_23")];
            tensor<int32, [1]> var_794_axes_0 = const()[name = string("op_794_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 3072, 1]> var_794 = squeeze(axes = var_794_axes_0, x = hidden_states_23)[name = string("op_794")];
            tensor<int32, [3]> var_795 = const()[name = string("op_795"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 1, 3072]> var_796 = transpose(perm = var_795, x = var_794)[name = string("transpose_16")];
            tensor<fp16, [1, 1, 3072]> hidden_states_25_cast_fp16 = add(x = hidden_states_21_cast_fp16, y = var_796)[name = string("hidden_states_25_cast_fp16")];
            tensor<int32, [1]> mean_13_axes_0 = const()[name = string("mean_13_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_13_keep_dims_0 = const()[name = string("mean_13_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 1, 1]> mean_13_cast_fp16 = reduce_mean(axes = mean_13_axes_0, keep_dims = mean_13_keep_dims_0, x = hidden_states_25_cast_fp16)[name = string("mean_13_cast_fp16")];
            tensor<fp16, [1, 1, 3072]> input_43_cast_fp16 = sub(x = hidden_states_25_cast_fp16, y = mean_13_cast_fp16)[name = string("input_43_cast_fp16")];
            tensor<int32, [1]> var_804_axes_0 = const()[name = string("op_804_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [3072]> model_model_layers_10_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_10_input_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(745652480)))];
            tensor<fp16, [1, 1, 3072]> var_804_cast_fp16 = layer_norm(axes = var_804_axes_0, epsilon = var_46_to_fp16, gamma = model_model_layers_10_input_layernorm_weight_to_fp16, x = input_43_cast_fp16)[name = string("op_804_cast_fp16")];
            tensor<int32, [3]> var_807 = const()[name = string("op_807"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> var_809_axes_0 = const()[name = string("op_809_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 3072, 1]> var_808 = transpose(perm = var_807, x = var_804_cast_fp16)[name = string("transpose_15")];
            tensor<fp16, [1, 3072, 1, 1]> var_809 = expand_dims(axes = var_809_axes_0, x = var_808)[name = string("op_809")];
            string var_816_pad_type_0 = const()[name = string("op_816_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> var_816_strides_0 = const()[name = string("op_816_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> var_816_pad_0 = const()[name = string("op_816_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> var_816_dilations_0 = const()[name = string("op_816_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 var_816_groups_0 = const()[name = string("op_816_groups_0"), val = int32(1)];
            tensor<fp16, [1, 3072, 1, 1]> var_816 = conv(dilations = var_816_dilations_0, groups = var_816_groups_0, pad = var_816_pad_0, pad_type = var_816_pad_type_0, strides = var_816_strides_0, weight = model_model_layers_10_self_attn_q_proj_weight_palettized, x = var_809)[name = string("op_816")];
            tensor<int32, [4]> var_817 = const()[name = string("op_817"), val = tensor<int32, [4]>([1, 24, 1, 128])];
            tensor<fp16, [1, 24, 1, 128]> var_818 = reshape(shape = var_817, x = var_816)[name = string("op_818")];
            string var_825_pad_type_0 = const()[name = string("op_825_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> var_825_strides_0 = const()[name = string("op_825_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> var_825_pad_0 = const()[name = string("op_825_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> var_825_dilations_0 = const()[name = string("op_825_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 var_825_groups_0 = const()[name = string("op_825_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 1]> var_825 = conv(dilations = var_825_dilations_0, groups = var_825_groups_0, pad = var_825_pad_0, pad_type = var_825_pad_type_0, strides = var_825_strides_0, weight = model_model_layers_10_self_attn_k_proj_weight_palettized, x = var_809)[name = string("op_825")];
            tensor<int32, [4]> var_826 = const()[name = string("op_826"), val = tensor<int32, [4]>([1, 8, 1, 128])];
            tensor<fp16, [1, 8, 1, 128]> var_827 = reshape(shape = var_826, x = var_825)[name = string("op_827")];
            string var_834_pad_type_0 = const()[name = string("op_834_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> var_834_strides_0 = const()[name = string("op_834_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> var_834_pad_0 = const()[name = string("op_834_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> var_834_dilations_0 = const()[name = string("op_834_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 var_834_groups_0 = const()[name = string("op_834_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 1]> var_834 = conv(dilations = var_834_dilations_0, groups = var_834_groups_0, pad = var_834_pad_0, pad_type = var_834_pad_type_0, strides = var_834_strides_0, weight = model_model_layers_10_self_attn_v_proj_weight_palettized, x = var_809)[name = string("op_834")];
            tensor<int32, [4]> var_835 = const()[name = string("op_835"), val = tensor<int32, [4]>([1, 8, 1, 128])];
            tensor<fp16, [1, 8, 1, 128]> var_836 = reshape(shape = var_835, x = var_834)[name = string("op_836")];
            tensor<int32, [4]> x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor<int32, [4]>([1, 24, 1, 64])];
            tensor<bool, [4]> x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 24, 1, 64]> x1_13 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = var_818)[name = string("x1_13")];
            tensor<int32, [4]> x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor<int32, [4]>([1, 24, 1, 128])];
            tensor<bool, [4]> x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 24, 1, 64]> x2_13 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = var_818)[name = string("x2_13")];
            tensor<fp16, [1, 24, 1, 64]> var_850_cast_fp16 = mul(x = x1_13, y = cos_3_cast_fp16)[name = string("op_850_cast_fp16")];
            tensor<fp16, [1, 24, 1, 64]> var_851_cast_fp16 = mul(x = x2_13, y = sin_3_cast_fp16)[name = string("op_851_cast_fp16")];
            tensor<fp16, [1, 24, 1, 64]> var_852_cast_fp16 = sub(x = var_850_cast_fp16, y = var_851_cast_fp16)[name = string("op_852_cast_fp16")];
            tensor<fp16, [1, 24, 1, 64]> var_853_cast_fp16 = mul(x = x2_13, y = cos_3_cast_fp16)[name = string("op_853_cast_fp16")];
            tensor<fp16, [1, 24, 1, 64]> var_854_cast_fp16 = mul(x = x1_13, y = sin_3_cast_fp16)[name = string("op_854_cast_fp16")];
            tensor<fp16, [1, 24, 1, 64]> var_855_cast_fp16 = add(x = var_853_cast_fp16, y = var_854_cast_fp16)[name = string("op_855_cast_fp16")];
            bool rotated_13_interleave_0 = const()[name = string("rotated_13_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 24, 1, 128]> rotated_13_cast_fp16 = concat(axis = var_51, interleave = rotated_13_interleave_0, values = (var_852_cast_fp16, var_855_cast_fp16))[name = string("rotated_13_cast_fp16")];
            tensor<int32, [4]> x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor<int32, [4]>([1, 8, 1, 64])];
            tensor<bool, [4]> x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 8, 1, 64]> x1_15 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = var_827)[name = string("x1_15")];
            tensor<int32, [4]> x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor<int32, [4]>([1, 8, 1, 128])];
            tensor<bool, [4]> x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 8, 1, 64]> x2_15 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = var_827)[name = string("x2_15")];
            tensor<fp16, [1, 8, 1, 64]> var_871_cast_fp16 = mul(x = x1_15, y = cos_3_cast_fp16)[name = string("op_871_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_872_cast_fp16 = mul(x = x2_15, y = sin_3_cast_fp16)[name = string("op_872_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_873_cast_fp16 = sub(x = var_871_cast_fp16, y = var_872_cast_fp16)[name = string("op_873_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_874_cast_fp16 = mul(x = x2_15, y = cos_3_cast_fp16)[name = string("op_874_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_875_cast_fp16 = mul(x = x1_15, y = sin_3_cast_fp16)[name = string("op_875_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_876_cast_fp16 = add(x = var_874_cast_fp16, y = var_875_cast_fp16)[name = string("op_876_cast_fp16")];
            bool rotated_15_interleave_0 = const()[name = string("rotated_15_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 8, 1, 128]> rotated_15_cast_fp16 = concat(axis = var_51, interleave = rotated_15_interleave_0, values = (var_873_cast_fp16, var_876_cast_fp16))[name = string("rotated_15_cast_fp16")];
            tensor<int32, [1]> expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor<int32, [1]>([10])];
            tensor<int32, [1]> expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_40 = const()[name = string("expand_dims_40"), val = tensor<int32, [1]>([11])];
            int32 concat_26_axis_0 = const()[name = string("concat_26_axis_0"), val = int32(0)];
            bool concat_26_interleave_0 = const()[name = string("concat_26_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_26 = concat(axis = concat_26_axis_0, interleave = concat_26_interleave_0, values = (expand_dims_36, expand_dims_37, current_pos, expand_dims_39))[name = string("concat_26")];
            tensor<int32, [1]> concat_27_values1_0 = const()[name = string("concat_27_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_27_values3_0 = const()[name = string("concat_27_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_27_axis_0 = const()[name = string("concat_27_axis_0"), val = int32(0)];
            bool concat_27_interleave_0 = const()[name = string("concat_27_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_27 = concat(axis = concat_27_axis_0, interleave = concat_27_interleave_0, values = (expand_dims_40, concat_27_values1_0, var_326, concat_27_values3_0))[name = string("concat_27")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_26, begin_mask = model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0, end = concat_27, end_mask = model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_7_stride_0, update = rotated_15_cast_fp16, x = coreml_update_state_19)[name = string("model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_6_write_state")];
            tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_20 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_6")];
            tensor<int32, [1]> expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor<int32, [1]>([38])];
            tensor<int32, [1]> expand_dims_43 = const()[name = string("expand_dims_43"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_45 = const()[name = string("expand_dims_45"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_46 = const()[name = string("expand_dims_46"), val = tensor<int32, [1]>([39])];
            int32 concat_30_axis_0 = const()[name = string("concat_30_axis_0"), val = int32(0)];
            bool concat_30_interleave_0 = const()[name = string("concat_30_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_30 = concat(axis = concat_30_axis_0, interleave = concat_30_interleave_0, values = (expand_dims_42, expand_dims_43, current_pos, expand_dims_45))[name = string("concat_30")];
            tensor<int32, [1]> concat_31_values1_0 = const()[name = string("concat_31_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_31_values3_0 = const()[name = string("concat_31_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_31_axis_0 = const()[name = string("concat_31_axis_0"), val = int32(0)];
            bool concat_31_interleave_0 = const()[name = string("concat_31_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_31 = concat(axis = concat_31_axis_0, interleave = concat_31_interleave_0, values = (expand_dims_46, concat_31_values1_0, var_326, concat_31_values3_0))[name = string("concat_31")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_30, begin_mask = model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0, end = concat_31, end_mask = model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_8_stride_0, update = var_836, x = coreml_update_state_20)[name = string("model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_7_write_state")];
            tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_21 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_7")];
            tensor<int32, [4]> var_896_begin_0 = const()[name = string("op_896_begin_0"), val = tensor<int32, [4]>([10, 0, 0, 0])];
            tensor<int32, [4]> var_896_end_0 = const()[name = string("op_896_end_0"), val = tensor<int32, [4]>([11, 8, 1024, 128])];
            tensor<bool, [4]> var_896_end_mask_0 = const()[name = string("op_896_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_896_cast_fp16 = slice_by_index(begin = var_896_begin_0, end = var_896_end_0, end_mask = var_896_end_mask_0, x = coreml_update_state_21)[name = string("op_896_cast_fp16")];
            tensor<int32, [1]> K_layer_cache_7_axes_0 = const()[name = string("K_layer_cache_7_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> K_layer_cache_7_cast_fp16 = squeeze(axes = K_layer_cache_7_axes_0, x = var_896_cast_fp16)[name = string("K_layer_cache_7_cast_fp16")];
            tensor<int32, [4]> var_898_begin_0 = const()[name = string("op_898_begin_0"), val = tensor<int32, [4]>([38, 0, 0, 0])];
            tensor<int32, [4]> var_898_end_0 = const()[name = string("op_898_end_0"), val = tensor<int32, [4]>([39, 8, 1024, 128])];
            tensor<bool, [4]> var_898_end_mask_0 = const()[name = string("op_898_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_898_cast_fp16 = slice_by_index(begin = var_898_begin_0, end = var_898_end_0, end_mask = var_898_end_mask_0, x = coreml_update_state_21)[name = string("op_898_cast_fp16")];
            tensor<int32, [1]> V_layer_cache_7_axes_0 = const()[name = string("V_layer_cache_7_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> V_layer_cache_7_cast_fp16 = squeeze(axes = V_layer_cache_7_axes_0, x = var_898_cast_fp16)[name = string("V_layer_cache_7_cast_fp16")];
            tensor<int32, [1]> x_95_axes_0 = const()[name = string("x_95_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_95_cast_fp16 = expand_dims(axes = x_95_axes_0, x = K_layer_cache_7_cast_fp16)[name = string("x_95_cast_fp16")];
            tensor<int32, [4]> var_907 = const()[name = string("op_907"), val = tensor<int32, [4]>([1, 3, 1, 1])];
            tensor<fp16, [8, 3, 1024, 128]> x_97_cast_fp16 = tile(reps = var_907, x = x_95_cast_fp16)[name = string("x_97_cast_fp16")];
            tensor<int32, [4]> var_911 = const()[name = string("op_911"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
            tensor<fp16, [1, 24, 1024, 128]> key_states_15_cast_fp16 = reshape(shape = var_911, x = x_97_cast_fp16)[name = string("key_states_15_cast_fp16")];
            tensor<int32, [1]> x_101_axes_0 = const()[name = string("x_101_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_101_cast_fp16 = expand_dims(axes = x_101_axes_0, x = V_layer_cache_7_cast_fp16)[name = string("x_101_cast_fp16")];
            tensor<int32, [4]> var_914 = const()[name = string("op_914"), val = tensor<int32, [4]>([1, 3, 1, 1])];
            tensor<fp16, [8, 3, 1024, 128]> x_103_cast_fp16 = tile(reps = var_914, x = x_101_cast_fp16)[name = string("x_103_cast_fp16")];
            tensor<int32, [4]> var_918 = const()[name = string("op_918"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
            tensor<fp16, [1, 24, 1024, 128]> value_states_15_cast_fp16 = reshape(shape = var_918, x = x_103_cast_fp16)[name = string("value_states_15_cast_fp16")];
            bool var_921_transpose_x_1 = const()[name = string("op_921_transpose_x_1"), val = bool(false)];
            bool var_921_transpose_y_1 = const()[name = string("op_921_transpose_y_1"), val = bool(true)];
            tensor<fp16, [1, 24, 1, 1024]> var_921_cast_fp16 = matmul(transpose_x = var_921_transpose_x_1, transpose_y = var_921_transpose_y_1, x = rotated_13_cast_fp16, y = key_states_15_cast_fp16)[name = string("op_921_cast_fp16")];
            fp16 var_922_to_fp16 = const()[name = string("op_922_to_fp16"), val = fp16(0x1.6ap-4)];
            tensor<fp16, [1, 24, 1, 1024]> attn_weights_13_cast_fp16 = mul(x = var_921_cast_fp16, y = var_922_to_fp16)[name = string("attn_weights_13_cast_fp16")];
            tensor<fp16, [1, 24, 1, 1024]> x_105_cast_fp16 = add(x = attn_weights_13_cast_fp16, y = causal_mask)[name = string("x_105_cast_fp16")];
            tensor<int32, [1]> reduce_max_3_axes_0 = const()[name = string("reduce_max_3_axes_0"), val = tensor<int32, [1]>([-1])];
            bool reduce_max_3_keep_dims_0 = const()[name = string("reduce_max_3_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 24, 1, 1]> reduce_max_3_cast_fp16 = reduce_max(axes = reduce_max_3_axes_0, keep_dims = reduce_max_3_keep_dims_0, x = x_105_cast_fp16)[name = string("reduce_max_3_cast_fp16")];
            tensor<fp16, [1, 24, 1, 1024]> x_107_cast_fp16 = sub(x = x_105_cast_fp16, y = reduce_max_3_cast_fp16)[name = string("x_107_cast_fp16")];
            tensor<fp16, [1, 24, 1, 1024]> exp_x_7_cast_fp16 = exp(x = x_107_cast_fp16)[name = string("exp_x_7_cast_fp16")];
            tensor<int32, [1]> var_933_axes_0 = const()[name = string("op_933_axes_0"), val = tensor<int32, [1]>([-1])];
            bool var_933_keep_dims_0 = const()[name = string("op_933_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 24, 1, 1]> var_933_cast_fp16 = reduce_sum(axes = var_933_axes_0, keep_dims = var_933_keep_dims_0, x = exp_x_7_cast_fp16)[name = string("op_933_cast_fp16")];
            tensor<fp16, [1, 24, 1, 1024]> attn_weights_15_cast_fp16 = real_div(x = exp_x_7_cast_fp16, y = var_933_cast_fp16)[name = string("attn_weights_15_cast_fp16")];
            bool attn_output_19_transpose_x_0 = const()[name = string("attn_output_19_transpose_x_0"), val = bool(false)];
            bool attn_output_19_transpose_y_0 = const()[name = string("attn_output_19_transpose_y_0"), val = bool(false)];
            tensor<fp16, [1, 24, 1, 128]> attn_output_19_cast_fp16 = matmul(transpose_x = attn_output_19_transpose_x_0, transpose_y = attn_output_19_transpose_y_0, x = attn_weights_15_cast_fp16, y = value_states_15_cast_fp16)[name = string("attn_output_19_cast_fp16")];
            tensor<int32, [4]> var_936_perm_0 = const()[name = string("op_936_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_938 = const()[name = string("op_938"), val = tensor<int32, [3]>([1, 1, 3072])];
            tensor<fp16, [1, 1, 24, 128]> var_936_cast_fp16 = transpose(perm = var_936_perm_0, x = attn_output_19_cast_fp16)[name = string("transpose_14")];
            tensor<fp16, [1, 1, 3072]> input_47_cast_fp16 = reshape(shape = var_938, x = var_936_cast_fp16)[name = string("input_47_cast_fp16")];
            tensor<fp16, [3072, 3072]> model_model_layers_10_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(745658688))), lut = tensor<fp16, [384, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755095936))))[name = string("model_model_layers_10_self_attn_o_proj_weight_promoted_to_fp16_palettized")];
            tensor<fp16, [1, 1, 3072]> linear_3_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_10_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_47_cast_fp16)[name = string("linear_3_cast_fp16")];
            tensor<fp16, [1, 1, 3072]> hidden_states_29_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = linear_3_cast_fp16)[name = string("hidden_states_29_cast_fp16")];
            tensor<int32, [1]> mean_15_axes_0 = const()[name = string("mean_15_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_15_keep_dims_0 = const()[name = string("mean_15_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 1, 1]> mean_15_cast_fp16 = reduce_mean(axes = mean_15_axes_0, keep_dims = mean_15_keep_dims_0, x = hidden_states_29_cast_fp16)[name = string("mean_15_cast_fp16")];
            tensor<fp16, [1, 1, 3072]> input_49_cast_fp16 = sub(x = hidden_states_29_cast_fp16, y = mean_15_cast_fp16)[name = string("input_49_cast_fp16")];
            tensor<int32, [1]> var_949_axes_0 = const()[name = string("op_949_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [3072]> model_model_layers_10_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_10_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755292608)))];
            tensor<fp16, [1, 1, 3072]> var_949_cast_fp16 = layer_norm(axes = var_949_axes_0, epsilon = var_46_to_fp16, gamma = model_model_layers_10_post_attention_layernorm_weight_to_fp16, x = input_49_cast_fp16)[name = string("op_949_cast_fp16")];
            tensor<int32, [3]> var_956 = const()[name = string("op_956"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> input_51_axes_0 = const()[name = string("input_51_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 3072, 1]> var_957 = transpose(perm = var_956, x = var_949_cast_fp16)[name = string("transpose_13")];
            tensor<fp16, [1, 3072, 1, 1]> input_51 = expand_dims(axes = input_51_axes_0, x = var_957)[name = string("input_51")];
            string input_53_pad_type_0 = const()[name = string("input_53_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> input_53_strides_0 = const()[name = string("input_53_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> input_53_pad_0 = const()[name = string("input_53_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> input_53_dilations_0 = const()[name = string("input_53_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 input_53_groups_0 = const()[name = string("input_53_groups_0"), val = int32(1)];
            tensor<fp16, [1, 8192, 1, 1]> input_53 = conv(dilations = input_53_dilations_0, groups = input_53_groups_0, pad = input_53_pad_0, pad_type = input_53_pad_type_0, strides = input_53_strides_0, weight = model_model_layers_10_mlp_gate_proj_weight_palettized, x = input_51)[name = string("input_53")];
            string up_states_7_pad_type_0 = const()[name = string("up_states_7_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> up_states_7_strides_0 = const()[name = string("up_states_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> up_states_7_pad_0 = const()[name = string("up_states_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> up_states_7_dilations_0 = const()[name = string("up_states_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 up_states_7_groups_0 = const()[name = string("up_states_7_groups_0"), val = int32(1)];
            tensor<fp16, [1, 8192, 1, 1]> up_states_7 = conv(dilations = up_states_7_dilations_0, groups = up_states_7_groups_0, pad = up_states_7_pad_0, pad_type = up_states_7_pad_type_0, strides = up_states_7_strides_0, weight = model_model_layers_10_mlp_up_proj_weight_palettized, x = input_51)[name = string("up_states_7")];
            tensor<fp16, [1, 8192, 1, 1]> gate_states_7 = silu(x = input_53)[name = string("gate_states_7")];
            tensor<fp16, [1, 8192, 1, 1]> input_55 = mul(x = gate_states_7, y = up_states_7)[name = string("input_55")];
            string hidden_states_31_pad_type_0 = const()[name = string("hidden_states_31_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> hidden_states_31_strides_0 = const()[name = string("hidden_states_31_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> hidden_states_31_pad_0 = const()[name = string("hidden_states_31_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> hidden_states_31_dilations_0 = const()[name = string("hidden_states_31_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 hidden_states_31_groups_0 = const()[name = string("hidden_states_31_groups_0"), val = int32(1)];
            tensor<fp16, [1, 3072, 1, 1]> hidden_states_31 = conv(dilations = hidden_states_31_dilations_0, groups = hidden_states_31_groups_0, pad = hidden_states_31_pad_0, pad_type = hidden_states_31_pad_type_0, strides = hidden_states_31_strides_0, weight = model_model_layers_10_mlp_down_proj_weight_palettized, x = input_55)[name = string("hidden_states_31")];
            tensor<int32, [1]> var_979_axes_0 = const()[name = string("op_979_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 3072, 1]> var_979 = squeeze(axes = var_979_axes_0, x = hidden_states_31)[name = string("op_979")];
            tensor<int32, [3]> var_980 = const()[name = string("op_980"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 1, 3072]> var_981 = transpose(perm = var_980, x = var_979)[name = string("transpose_12")];
            tensor<fp16, [1, 1, 3072]> hidden_states_33_cast_fp16 = add(x = hidden_states_29_cast_fp16, y = var_981)[name = string("hidden_states_33_cast_fp16")];
            tensor<int32, [1]> mean_17_axes_0 = const()[name = string("mean_17_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_17_keep_dims_0 = const()[name = string("mean_17_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 1, 1]> mean_17_cast_fp16 = reduce_mean(axes = mean_17_axes_0, keep_dims = mean_17_keep_dims_0, x = hidden_states_33_cast_fp16)[name = string("mean_17_cast_fp16")];
            tensor<fp16, [1, 1, 3072]> input_57_cast_fp16 = sub(x = hidden_states_33_cast_fp16, y = mean_17_cast_fp16)[name = string("input_57_cast_fp16")];
            tensor<int32, [1]> var_989_axes_0 = const()[name = string("op_989_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [3072]> model_model_layers_11_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_11_input_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755298816)))];
            tensor<fp16, [1, 1, 3072]> var_989_cast_fp16 = layer_norm(axes = var_989_axes_0, epsilon = var_46_to_fp16, gamma = model_model_layers_11_input_layernorm_weight_to_fp16, x = input_57_cast_fp16)[name = string("op_989_cast_fp16")];
            tensor<int32, [3]> var_992 = const()[name = string("op_992"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> var_994_axes_0 = const()[name = string("op_994_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 3072, 1]> var_993 = transpose(perm = var_992, x = var_989_cast_fp16)[name = string("transpose_11")];
            tensor<fp16, [1, 3072, 1, 1]> var_994 = expand_dims(axes = var_994_axes_0, x = var_993)[name = string("op_994")];
            string var_1001_pad_type_0 = const()[name = string("op_1001_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> var_1001_strides_0 = const()[name = string("op_1001_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> var_1001_pad_0 = const()[name = string("op_1001_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> var_1001_dilations_0 = const()[name = string("op_1001_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 var_1001_groups_0 = const()[name = string("op_1001_groups_0"), val = int32(1)];
            tensor<fp16, [1, 3072, 1, 1]> var_1001 = conv(dilations = var_1001_dilations_0, groups = var_1001_groups_0, pad = var_1001_pad_0, pad_type = var_1001_pad_type_0, strides = var_1001_strides_0, weight = model_model_layers_11_self_attn_q_proj_weight_palettized, x = var_994)[name = string("op_1001")];
            tensor<int32, [4]> var_1002 = const()[name = string("op_1002"), val = tensor<int32, [4]>([1, 24, 1, 128])];
            tensor<fp16, [1, 24, 1, 128]> var_1003 = reshape(shape = var_1002, x = var_1001)[name = string("op_1003")];
            string var_1010_pad_type_0 = const()[name = string("op_1010_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> var_1010_strides_0 = const()[name = string("op_1010_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> var_1010_pad_0 = const()[name = string("op_1010_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> var_1010_dilations_0 = const()[name = string("op_1010_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 var_1010_groups_0 = const()[name = string("op_1010_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 1]> var_1010 = conv(dilations = var_1010_dilations_0, groups = var_1010_groups_0, pad = var_1010_pad_0, pad_type = var_1010_pad_type_0, strides = var_1010_strides_0, weight = model_model_layers_11_self_attn_k_proj_weight_palettized, x = var_994)[name = string("op_1010")];
            tensor<int32, [4]> var_1011 = const()[name = string("op_1011"), val = tensor<int32, [4]>([1, 8, 1, 128])];
            tensor<fp16, [1, 8, 1, 128]> var_1012 = reshape(shape = var_1011, x = var_1010)[name = string("op_1012")];
            string var_1019_pad_type_0 = const()[name = string("op_1019_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> var_1019_strides_0 = const()[name = string("op_1019_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> var_1019_pad_0 = const()[name = string("op_1019_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> var_1019_dilations_0 = const()[name = string("op_1019_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 var_1019_groups_0 = const()[name = string("op_1019_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 1]> var_1019 = conv(dilations = var_1019_dilations_0, groups = var_1019_groups_0, pad = var_1019_pad_0, pad_type = var_1019_pad_type_0, strides = var_1019_strides_0, weight = model_model_layers_11_self_attn_v_proj_weight_palettized, x = var_994)[name = string("op_1019")];
            tensor<int32, [4]> var_1020 = const()[name = string("op_1020"), val = tensor<int32, [4]>([1, 8, 1, 128])];
            tensor<fp16, [1, 8, 1, 128]> var_1021 = reshape(shape = var_1020, x = var_1019)[name = string("op_1021")];
            tensor<int32, [4]> x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor<int32, [4]>([1, 24, 1, 64])];
            tensor<bool, [4]> x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 24, 1, 64]> x1_17 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = var_1003)[name = string("x1_17")];
            tensor<int32, [4]> x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor<int32, [4]>([1, 24, 1, 128])];
            tensor<bool, [4]> x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 24, 1, 64]> x2_17 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = var_1003)[name = string("x2_17")];
            tensor<fp16, [1, 24, 1, 64]> var_1035_cast_fp16 = mul(x = x1_17, y = cos_3_cast_fp16)[name = string("op_1035_cast_fp16")];
            tensor<fp16, [1, 24, 1, 64]> var_1036_cast_fp16 = mul(x = x2_17, y = sin_3_cast_fp16)[name = string("op_1036_cast_fp16")];
            tensor<fp16, [1, 24, 1, 64]> var_1037_cast_fp16 = sub(x = var_1035_cast_fp16, y = var_1036_cast_fp16)[name = string("op_1037_cast_fp16")];
            tensor<fp16, [1, 24, 1, 64]> var_1038_cast_fp16 = mul(x = x2_17, y = cos_3_cast_fp16)[name = string("op_1038_cast_fp16")];
            tensor<fp16, [1, 24, 1, 64]> var_1039_cast_fp16 = mul(x = x1_17, y = sin_3_cast_fp16)[name = string("op_1039_cast_fp16")];
            tensor<fp16, [1, 24, 1, 64]> var_1040_cast_fp16 = add(x = var_1038_cast_fp16, y = var_1039_cast_fp16)[name = string("op_1040_cast_fp16")];
            bool rotated_17_interleave_0 = const()[name = string("rotated_17_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 24, 1, 128]> rotated_17_cast_fp16 = concat(axis = var_51, interleave = rotated_17_interleave_0, values = (var_1037_cast_fp16, var_1040_cast_fp16))[name = string("rotated_17_cast_fp16")];
            tensor<int32, [4]> x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor<int32, [4]>([1, 8, 1, 64])];
            tensor<bool, [4]> x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 8, 1, 64]> x1_19 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = var_1012)[name = string("x1_19")];
            tensor<int32, [4]> x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor<int32, [4]>([1, 8, 1, 128])];
            tensor<bool, [4]> x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 8, 1, 64]> x2_19 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = var_1012)[name = string("x2_19")];
            tensor<fp16, [1, 8, 1, 64]> var_1056_cast_fp16 = mul(x = x1_19, y = cos_3_cast_fp16)[name = string("op_1056_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_1057_cast_fp16 = mul(x = x2_19, y = sin_3_cast_fp16)[name = string("op_1057_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_1058_cast_fp16 = sub(x = var_1056_cast_fp16, y = var_1057_cast_fp16)[name = string("op_1058_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_1059_cast_fp16 = mul(x = x2_19, y = cos_3_cast_fp16)[name = string("op_1059_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_1060_cast_fp16 = mul(x = x1_19, y = sin_3_cast_fp16)[name = string("op_1060_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_1061_cast_fp16 = add(x = var_1059_cast_fp16, y = var_1060_cast_fp16)[name = string("op_1061_cast_fp16")];
            bool rotated_19_interleave_0 = const()[name = string("rotated_19_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 8, 1, 128]> rotated_19_cast_fp16 = concat(axis = var_51, interleave = rotated_19_interleave_0, values = (var_1058_cast_fp16, var_1061_cast_fp16))[name = string("rotated_19_cast_fp16")];
            tensor<int32, [1]> expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor<int32, [1]>([11])];
            tensor<int32, [1]> expand_dims_49 = const()[name = string("expand_dims_49"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor<int32, [1]>([12])];
            int32 concat_34_axis_0 = const()[name = string("concat_34_axis_0"), val = int32(0)];
            bool concat_34_interleave_0 = const()[name = string("concat_34_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_34 = concat(axis = concat_34_axis_0, interleave = concat_34_interleave_0, values = (expand_dims_48, expand_dims_49, current_pos, expand_dims_51))[name = string("concat_34")];
            tensor<int32, [1]> concat_35_values1_0 = const()[name = string("concat_35_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_35_values3_0 = const()[name = string("concat_35_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_35_axis_0 = const()[name = string("concat_35_axis_0"), val = int32(0)];
            bool concat_35_interleave_0 = const()[name = string("concat_35_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_35 = concat(axis = concat_35_axis_0, interleave = concat_35_interleave_0, values = (expand_dims_52, concat_35_values1_0, var_326, concat_35_values3_0))[name = string("concat_35")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_9_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_34, begin_mask = model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0, end = concat_35, end_mask = model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_9_stride_0, update = rotated_19_cast_fp16, x = coreml_update_state_21)[name = string("model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_8_write_state")];
            tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_22 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_8")];
            tensor<int32, [1]> expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor<int32, [1]>([39])];
            tensor<int32, [1]> expand_dims_55 = const()[name = string("expand_dims_55"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_57 = const()[name = string("expand_dims_57"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_58 = const()[name = string("expand_dims_58"), val = tensor<int32, [1]>([40])];
            int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)];
            bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (expand_dims_54, expand_dims_55, current_pos, expand_dims_57))[name = string("concat_38")];
            tensor<int32, [1]> concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)];
            bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (expand_dims_58, concat_39_values1_0, var_326, concat_39_values3_0))[name = string("concat_39")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_10_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_38, begin_mask = model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0, end = concat_39, end_mask = model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_10_stride_0, update = var_1021, x = coreml_update_state_22)[name = string("model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_9_write_state")];
            tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_23 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_9")];
            tensor<int32, [4]> var_1081_begin_0 = const()[name = string("op_1081_begin_0"), val = tensor<int32, [4]>([11, 0, 0, 0])];
            tensor<int32, [4]> var_1081_end_0 = const()[name = string("op_1081_end_0"), val = tensor<int32, [4]>([12, 8, 1024, 128])];
            tensor<bool, [4]> var_1081_end_mask_0 = const()[name = string("op_1081_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_1081_cast_fp16 = slice_by_index(begin = var_1081_begin_0, end = var_1081_end_0, end_mask = var_1081_end_mask_0, x = coreml_update_state_23)[name = string("op_1081_cast_fp16")];
            tensor<int32, [1]> K_layer_cache_9_axes_0 = const()[name = string("K_layer_cache_9_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> K_layer_cache_9_cast_fp16 = squeeze(axes = K_layer_cache_9_axes_0, x = var_1081_cast_fp16)[name = string("K_layer_cache_9_cast_fp16")];
            tensor<int32, [4]> var_1083_begin_0 = const()[name = string("op_1083_begin_0"), val = tensor<int32, [4]>([39, 0, 0, 0])];
            tensor<int32, [4]> var_1083_end_0 = const()[name = string("op_1083_end_0"), val = tensor<int32, [4]>([40, 8, 1024, 128])];
            tensor<bool, [4]> var_1083_end_mask_0 = const()[name = string("op_1083_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_1083_cast_fp16 = slice_by_index(begin = var_1083_begin_0, end = var_1083_end_0, end_mask = var_1083_end_mask_0, x = coreml_update_state_23)[name = string("op_1083_cast_fp16")];
            tensor<int32, [1]> V_layer_cache_9_axes_0 = const()[name = string("V_layer_cache_9_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> V_layer_cache_9_cast_fp16 = squeeze(axes = V_layer_cache_9_axes_0, x = var_1083_cast_fp16)[name = string("V_layer_cache_9_cast_fp16")];
            tensor<int32, [1]> x_123_axes_0 = const()[name = string("x_123_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_123_cast_fp16 = expand_dims(axes = x_123_axes_0, x = K_layer_cache_9_cast_fp16)[name = string("x_123_cast_fp16")];
            tensor<int32, [4]> var_1092 = const()[name = string("op_1092"), val = tensor<int32, [4]>([1, 3, 1, 1])];
            tensor<fp16, [8, 3, 1024, 128]> x_125_cast_fp16 = tile(reps = var_1092, x = x_123_cast_fp16)[name = string("x_125_cast_fp16")];
            tensor<int32, [4]> var_1096 = const()[name = string("op_1096"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
            tensor<fp16, [1, 24, 1024, 128]> key_states_19_cast_fp16 = reshape(shape = var_1096, x = x_125_cast_fp16)[name = string("key_states_19_cast_fp16")];
            tensor<int32, [1]> x_129_axes_0 = const()[name = string("x_129_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_129_cast_fp16 = expand_dims(axes = x_129_axes_0, x = V_layer_cache_9_cast_fp16)[name = string("x_129_cast_fp16")];
            tensor<int32, [4]> var_1099 = const()[name = string("op_1099"), val = tensor<int32, [4]>([1, 3, 1, 1])];
            tensor<fp16, [8, 3, 1024, 128]> x_131_cast_fp16 = tile(reps = var_1099, x = x_129_cast_fp16)[name = string("x_131_cast_fp16")];
            tensor<int32, [4]> var_1103 = const()[name = string("op_1103"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
            tensor<fp16, [1, 24, 1024, 128]> value_states_19_cast_fp16 = reshape(shape = var_1103, x = x_131_cast_fp16)[name = string("value_states_19_cast_fp16")];
            bool var_1106_transpose_x_1 = const()[name = string("op_1106_transpose_x_1"), val = bool(false)];
            bool var_1106_transpose_y_1 = const()[name = string("op_1106_transpose_y_1"), val = bool(true)];
            tensor<fp16, [1, 24, 1, 1024]> var_1106_cast_fp16 = matmul(transpose_x = var_1106_transpose_x_1, transpose_y = var_1106_transpose_y_1, x = rotated_17_cast_fp16, y = key_states_19_cast_fp16)[name = string("op_1106_cast_fp16")];
            fp16 var_1107_to_fp16 = const()[name = string("op_1107_to_fp16"), val = fp16(0x1.6ap-4)];
            tensor<fp16, [1, 24, 1, 1024]> attn_weights_17_cast_fp16 = mul(x = var_1106_cast_fp16, y = var_1107_to_fp16)[name = string("attn_weights_17_cast_fp16")];
            tensor<fp16, [1, 24, 1, 1024]> x_133_cast_fp16 = add(x = attn_weights_17_cast_fp16, y = causal_mask)[name = string("x_133_cast_fp16")];
            tensor<int32, [1]> reduce_max_4_axes_0 = const()[name = string("reduce_max_4_axes_0"), val = tensor<int32, [1]>([-1])];
            bool reduce_max_4_keep_dims_0 = const()[name = string("reduce_max_4_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 24, 1, 1]> reduce_max_4_cast_fp16 = reduce_max(axes = reduce_max_4_axes_0, keep_dims = reduce_max_4_keep_dims_0, x = x_133_cast_fp16)[name = string("reduce_max_4_cast_fp16")];
            tensor<fp16, [1, 24, 1, 1024]> x_135_cast_fp16 = sub(x = x_133_cast_fp16, y = reduce_max_4_cast_fp16)[name = string("x_135_cast_fp16")];
            tensor<fp16, [1, 24, 1, 1024]> exp_x_9_cast_fp16 = exp(x = x_135_cast_fp16)[name = string("exp_x_9_cast_fp16")];
            tensor<int32, [1]> var_1118_axes_0 = const()[name = string("op_1118_axes_0"), val = tensor<int32, [1]>([-1])];
            bool var_1118_keep_dims_0 = const()[name = string("op_1118_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 24, 1, 1]> var_1118_cast_fp16 = reduce_sum(axes = var_1118_axes_0, keep_dims = var_1118_keep_dims_0, x = exp_x_9_cast_fp16)[name = string("op_1118_cast_fp16")];
            tensor<fp16, [1, 24, 1, 1024]> attn_weights_19_cast_fp16 = real_div(x = exp_x_9_cast_fp16, y = var_1118_cast_fp16)[name = string("attn_weights_19_cast_fp16")];
            bool attn_output_25_transpose_x_0 = const()[name = string("attn_output_25_transpose_x_0"), val = bool(false)];
            bool attn_output_25_transpose_y_0 = const()[name = string("attn_output_25_transpose_y_0"), val = bool(false)];
            tensor<fp16, [1, 24, 1, 128]> attn_output_25_cast_fp16 = matmul(transpose_x = attn_output_25_transpose_x_0, transpose_y = attn_output_25_transpose_y_0, x = attn_weights_19_cast_fp16, y = value_states_19_cast_fp16)[name = string("attn_output_25_cast_fp16")];
            tensor<int32, [4]> var_1121_perm_0 = const()[name = string("op_1121_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_1123 = const()[name = string("op_1123"), val = tensor<int32, [3]>([1, 1, 3072])];
            tensor<fp16, [1, 1, 24, 128]> var_1121_cast_fp16 = transpose(perm = var_1121_perm_0, x = attn_output_25_cast_fp16)[name = string("transpose_10")];
            tensor<fp16, [1, 1, 3072]> input_61_cast_fp16 = reshape(shape = var_1123, x = var_1121_cast_fp16)[name = string("input_61_cast_fp16")];
            tensor<fp16, [3072, 3072]> model_model_layers_11_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755305024))), lut = tensor<fp16, [384, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(764742272))))[name = string("model_model_layers_11_self_attn_o_proj_weight_promoted_to_fp16_palettized")];
            tensor<fp16, [1, 1, 3072]> linear_4_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_11_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_61_cast_fp16)[name = string("linear_4_cast_fp16")];
            tensor<fp16, [1, 1, 3072]> hidden_states_37_cast_fp16 = add(x = hidden_states_33_cast_fp16, y = linear_4_cast_fp16)[name = string("hidden_states_37_cast_fp16")];
            tensor<int32, [1]> mean_19_axes_0 = const()[name = string("mean_19_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_19_keep_dims_0 = const()[name = string("mean_19_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 1, 1]> mean_19_cast_fp16 = reduce_mean(axes = mean_19_axes_0, keep_dims = mean_19_keep_dims_0, x = hidden_states_37_cast_fp16)[name = string("mean_19_cast_fp16")];
            tensor<fp16, [1, 1, 3072]> input_63_cast_fp16 = sub(x = hidden_states_37_cast_fp16, y = mean_19_cast_fp16)[name = string("input_63_cast_fp16")];
            tensor<int32, [1]> var_1134_axes_0 = const()[name = string("op_1134_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [3072]> model_model_layers_11_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_11_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(764938944)))];
            tensor<fp16, [1, 1, 3072]> var_1134_cast_fp16 = layer_norm(axes = var_1134_axes_0, epsilon = var_46_to_fp16, gamma = model_model_layers_11_post_attention_layernorm_weight_to_fp16, x = input_63_cast_fp16)[name = string("op_1134_cast_fp16")];
            tensor<int32, [3]> var_1141 = const()[name = string("op_1141"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> input_65_axes_0 = const()[name = string("input_65_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 3072, 1]> var_1142 = transpose(perm = var_1141, x = var_1134_cast_fp16)[name = string("transpose_9")];
            tensor<fp16, [1, 3072, 1, 1]> input_65 = expand_dims(axes = input_65_axes_0, x = var_1142)[name = string("input_65")];
            string input_67_pad_type_0 = const()[name = string("input_67_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> input_67_strides_0 = const()[name = string("input_67_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> input_67_pad_0 = const()[name = string("input_67_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> input_67_dilations_0 = const()[name = string("input_67_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 input_67_groups_0 = const()[name = string("input_67_groups_0"), val = int32(1)];
            tensor<fp16, [1, 8192, 1, 1]> input_67 = conv(dilations = input_67_dilations_0, groups = input_67_groups_0, pad = input_67_pad_0, pad_type = input_67_pad_type_0, strides = input_67_strides_0, weight = model_model_layers_11_mlp_gate_proj_weight_palettized, x = input_65)[name = string("input_67")];
            string up_states_9_pad_type_0 = const()[name = string("up_states_9_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> up_states_9_strides_0 = const()[name = string("up_states_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> up_states_9_pad_0 = const()[name = string("up_states_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> up_states_9_dilations_0 = const()[name = string("up_states_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 up_states_9_groups_0 = const()[name = string("up_states_9_groups_0"), val = int32(1)];
            tensor<fp16, [1, 8192, 1, 1]> up_states_9 = conv(dilations = up_states_9_dilations_0, groups = up_states_9_groups_0, pad = up_states_9_pad_0, pad_type = up_states_9_pad_type_0, strides = up_states_9_strides_0, weight = model_model_layers_11_mlp_up_proj_weight_palettized, x = input_65)[name = string("up_states_9")];
            tensor<fp16, [1, 8192, 1, 1]> gate_states_9 = silu(x = input_67)[name = string("gate_states_9")];
            tensor<fp16, [1, 8192, 1, 1]> input_69 = mul(x = gate_states_9, y = up_states_9)[name = string("input_69")];
            string hidden_states_39_pad_type_0 = const()[name = string("hidden_states_39_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> hidden_states_39_strides_0 = const()[name = string("hidden_states_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> hidden_states_39_pad_0 = const()[name = string("hidden_states_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> hidden_states_39_dilations_0 = const()[name = string("hidden_states_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 hidden_states_39_groups_0 = const()[name = string("hidden_states_39_groups_0"), val = int32(1)];
            tensor<fp16, [1, 3072, 1, 1]> hidden_states_39 = conv(dilations = hidden_states_39_dilations_0, groups = hidden_states_39_groups_0, pad = hidden_states_39_pad_0, pad_type = hidden_states_39_pad_type_0, strides = hidden_states_39_strides_0, weight = model_model_layers_11_mlp_down_proj_weight_palettized, x = input_69)[name = string("hidden_states_39")];
            tensor<int32, [1]> var_1164_axes_0 = const()[name = string("op_1164_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 3072, 1]> var_1164 = squeeze(axes = var_1164_axes_0, x = hidden_states_39)[name = string("op_1164")];
            tensor<int32, [3]> var_1165 = const()[name = string("op_1165"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 1, 3072]> var_1166 = transpose(perm = var_1165, x = var_1164)[name = string("transpose_8")];
            tensor<fp16, [1, 1, 3072]> hidden_states_41_cast_fp16 = add(x = hidden_states_37_cast_fp16, y = var_1166)[name = string("hidden_states_41_cast_fp16")];
            tensor<int32, [1]> mean_21_axes_0 = const()[name = string("mean_21_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_21_keep_dims_0 = const()[name = string("mean_21_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 1, 1]> mean_21_cast_fp16 = reduce_mean(axes = mean_21_axes_0, keep_dims = mean_21_keep_dims_0, x = hidden_states_41_cast_fp16)[name = string("mean_21_cast_fp16")];
            tensor<fp16, [1, 1, 3072]> input_71_cast_fp16 = sub(x = hidden_states_41_cast_fp16, y = mean_21_cast_fp16)[name = string("input_71_cast_fp16")];
            tensor<int32, [1]> var_1174_axes_0 = const()[name = string("op_1174_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [3072]> model_model_layers_12_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_12_input_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(764945152)))];
            tensor<fp16, [1, 1, 3072]> var_1174_cast_fp16 = layer_norm(axes = var_1174_axes_0, epsilon = var_46_to_fp16, gamma = model_model_layers_12_input_layernorm_weight_to_fp16, x = input_71_cast_fp16)[name = string("op_1174_cast_fp16")];
            tensor<int32, [3]> var_1177 = const()[name = string("op_1177"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> var_1179_axes_0 = const()[name = string("op_1179_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 3072, 1]> var_1178 = transpose(perm = var_1177, x = var_1174_cast_fp16)[name = string("transpose_7")];
            tensor<fp16, [1, 3072, 1, 1]> var_1179 = expand_dims(axes = var_1179_axes_0, x = var_1178)[name = string("op_1179")];
            string var_1186_pad_type_0 = const()[name = string("op_1186_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> var_1186_strides_0 = const()[name = string("op_1186_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> var_1186_pad_0 = const()[name = string("op_1186_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> var_1186_dilations_0 = const()[name = string("op_1186_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 var_1186_groups_0 = const()[name = string("op_1186_groups_0"), val = int32(1)];
            tensor<fp16, [1, 3072, 1, 1]> var_1186 = conv(dilations = var_1186_dilations_0, groups = var_1186_groups_0, pad = var_1186_pad_0, pad_type = var_1186_pad_type_0, strides = var_1186_strides_0, weight = model_model_layers_12_self_attn_q_proj_weight_palettized, x = var_1179)[name = string("op_1186")];
            tensor<int32, [4]> var_1187 = const()[name = string("op_1187"), val = tensor<int32, [4]>([1, 24, 1, 128])];
            tensor<fp16, [1, 24, 1, 128]> var_1188 = reshape(shape = var_1187, x = var_1186)[name = string("op_1188")];
            string var_1195_pad_type_0 = const()[name = string("op_1195_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> var_1195_strides_0 = const()[name = string("op_1195_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> var_1195_pad_0 = const()[name = string("op_1195_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> var_1195_dilations_0 = const()[name = string("op_1195_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 var_1195_groups_0 = const()[name = string("op_1195_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 1]> var_1195 = conv(dilations = var_1195_dilations_0, groups = var_1195_groups_0, pad = var_1195_pad_0, pad_type = var_1195_pad_type_0, strides = var_1195_strides_0, weight = model_model_layers_12_self_attn_k_proj_weight_palettized, x = var_1179)[name = string("op_1195")];
            tensor<int32, [4]> var_1196 = const()[name = string("op_1196"), val = tensor<int32, [4]>([1, 8, 1, 128])];
            tensor<fp16, [1, 8, 1, 128]> var_1197 = reshape(shape = var_1196, x = var_1195)[name = string("op_1197")];
            string var_1204_pad_type_0 = const()[name = string("op_1204_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> var_1204_strides_0 = const()[name = string("op_1204_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> var_1204_pad_0 = const()[name = string("op_1204_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> var_1204_dilations_0 = const()[name = string("op_1204_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 var_1204_groups_0 = const()[name = string("op_1204_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 1]> var_1204 = conv(dilations = var_1204_dilations_0, groups = var_1204_groups_0, pad = var_1204_pad_0, pad_type = var_1204_pad_type_0, strides = var_1204_strides_0, weight = model_model_layers_12_self_attn_v_proj_weight_palettized, x = var_1179)[name = string("op_1204")];
            tensor<int32, [4]> var_1205 = const()[name = string("op_1205"), val = tensor<int32, [4]>([1, 8, 1, 128])];
            tensor<fp16, [1, 8, 1, 128]> var_1206 = reshape(shape = var_1205, x = var_1204)[name = string("op_1206")];
            tensor<int32, [4]> x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor<int32, [4]>([1, 24, 1, 64])];
            tensor<bool, [4]> x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 24, 1, 64]> x1_21 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = var_1188)[name = string("x1_21")];
            tensor<int32, [4]> x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor<int32, [4]>([1, 24, 1, 128])];
            tensor<bool, [4]> x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 24, 1, 64]> x2_21 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = var_1188)[name = string("x2_21")];
            tensor<fp16, [1, 24, 1, 64]> var_1220_cast_fp16 = mul(x = x1_21, y = cos_3_cast_fp16)[name = string("op_1220_cast_fp16")];
            tensor<fp16, [1, 24, 1, 64]> var_1221_cast_fp16 = mul(x = x2_21, y = sin_3_cast_fp16)[name = string("op_1221_cast_fp16")];
            tensor<fp16, [1, 24, 1, 64]> var_1222_cast_fp16 = sub(x = var_1220_cast_fp16, y = var_1221_cast_fp16)[name = string("op_1222_cast_fp16")];
            tensor<fp16, [1, 24, 1, 64]> var_1223_cast_fp16 = mul(x = x2_21, y = cos_3_cast_fp16)[name = string("op_1223_cast_fp16")];
            tensor<fp16, [1, 24, 1, 64]> var_1224_cast_fp16 = mul(x = x1_21, y = sin_3_cast_fp16)[name = string("op_1224_cast_fp16")];
            tensor<fp16, [1, 24, 1, 64]> var_1225_cast_fp16 = add(x = var_1223_cast_fp16, y = var_1224_cast_fp16)[name = string("op_1225_cast_fp16")];
            bool rotated_21_interleave_0 = const()[name = string("rotated_21_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 24, 1, 128]> rotated_21_cast_fp16 = concat(axis = var_51, interleave = rotated_21_interleave_0, values = (var_1222_cast_fp16, var_1225_cast_fp16))[name = string("rotated_21_cast_fp16")];
            tensor<int32, [4]> x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor<int32, [4]>([1, 8, 1, 64])];
            tensor<bool, [4]> x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 8, 1, 64]> x1_23 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = var_1197)[name = string("x1_23")];
            tensor<int32, [4]> x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor<int32, [4]>([1, 8, 1, 128])];
            tensor<bool, [4]> x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 8, 1, 64]> x2_23 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = var_1197)[name = string("x2_23")];
            tensor<fp16, [1, 8, 1, 64]> var_1241_cast_fp16 = mul(x = x1_23, y = cos_3_cast_fp16)[name = string("op_1241_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_1242_cast_fp16 = mul(x = x2_23, y = sin_3_cast_fp16)[name = string("op_1242_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_1243_cast_fp16 = sub(x = var_1241_cast_fp16, y = var_1242_cast_fp16)[name = string("op_1243_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_1244_cast_fp16 = mul(x = x2_23, y = cos_3_cast_fp16)[name = string("op_1244_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_1245_cast_fp16 = mul(x = x1_23, y = sin_3_cast_fp16)[name = string("op_1245_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_1246_cast_fp16 = add(x = var_1244_cast_fp16, y = var_1245_cast_fp16)[name = string("op_1246_cast_fp16")];
            bool rotated_23_interleave_0 = const()[name = string("rotated_23_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 8, 1, 128]> rotated_23_cast_fp16 = concat(axis = var_51, interleave = rotated_23_interleave_0, values = (var_1243_cast_fp16, var_1246_cast_fp16))[name = string("rotated_23_cast_fp16")];
            tensor<int32, [1]> expand_dims_60 = const()[name = string("expand_dims_60"), val = tensor<int32, [1]>([12])];
            tensor<int32, [1]> expand_dims_61 = const()[name = string("expand_dims_61"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_63 = const()[name = string("expand_dims_63"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor<int32, [1]>([13])];
            int32 concat_42_axis_0 = const()[name = string("concat_42_axis_0"), val = int32(0)];
            bool concat_42_interleave_0 = const()[name = string("concat_42_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_42 = concat(axis = concat_42_axis_0, interleave = concat_42_interleave_0, values = (expand_dims_60, expand_dims_61, current_pos, expand_dims_63))[name = string("concat_42")];
            tensor<int32, [1]> concat_43_values1_0 = const()[name = string("concat_43_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)];
            bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_64, concat_43_values1_0, var_326, concat_43_values3_0))[name = string("concat_43")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_11_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_42, begin_mask = model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0, end = concat_43, end_mask = model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_11_stride_0, update = rotated_23_cast_fp16, x = coreml_update_state_23)[name = string("model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_10_write_state")];
            tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_24 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_10")];
            tensor<int32, [1]> expand_dims_66 = const()[name = string("expand_dims_66"), val = tensor<int32, [1]>([40])];
            tensor<int32, [1]> expand_dims_67 = const()[name = string("expand_dims_67"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_69 = const()[name = string("expand_dims_69"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_70 = const()[name = string("expand_dims_70"), val = tensor<int32, [1]>([41])];
            int32 concat_46_axis_0 = const()[name = string("concat_46_axis_0"), val = int32(0)];
            bool concat_46_interleave_0 = const()[name = string("concat_46_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_46 = concat(axis = concat_46_axis_0, interleave = concat_46_interleave_0, values = (expand_dims_66, expand_dims_67, current_pos, expand_dims_69))[name = string("concat_46")];
            tensor<int32, [1]> concat_47_values1_0 = const()[name = string("concat_47_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_47_values3_0 = const()[name = string("concat_47_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_47_axis_0 = const()[name = string("concat_47_axis_0"), val = int32(0)];
            bool concat_47_interleave_0 = const()[name = string("concat_47_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_47 = concat(axis = concat_47_axis_0, interleave = concat_47_interleave_0, values = (expand_dims_70, concat_47_values1_0, var_326, concat_47_values3_0))[name = string("concat_47")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_12_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_46, begin_mask = model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0, end = concat_47, end_mask = model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_12_stride_0, update = var_1206, x = coreml_update_state_24)[name = string("model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_11_write_state")];
            tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_25 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_11")];
            tensor<int32, [4]> var_1266_begin_0 = const()[name = string("op_1266_begin_0"), val = tensor<int32, [4]>([12, 0, 0, 0])];
            tensor<int32, [4]> var_1266_end_0 = const()[name = string("op_1266_end_0"), val = tensor<int32, [4]>([13, 8, 1024, 128])];
            tensor<bool, [4]> var_1266_end_mask_0 = const()[name = string("op_1266_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_1266_cast_fp16 = slice_by_index(begin = var_1266_begin_0, end = var_1266_end_0, end_mask = var_1266_end_mask_0, x = coreml_update_state_25)[name = string("op_1266_cast_fp16")];
            tensor<int32, [1]> K_layer_cache_11_axes_0 = const()[name = string("K_layer_cache_11_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> K_layer_cache_11_cast_fp16 = squeeze(axes = K_layer_cache_11_axes_0, x = var_1266_cast_fp16)[name = string("K_layer_cache_11_cast_fp16")];
            tensor<int32, [4]> var_1268_begin_0 = const()[name = string("op_1268_begin_0"), val = tensor<int32, [4]>([40, 0, 0, 0])];
            tensor<int32, [4]> var_1268_end_0 = const()[name = string("op_1268_end_0"), val = tensor<int32, [4]>([41, 8, 1024, 128])];
            tensor<bool, [4]> var_1268_end_mask_0 = const()[name = string("op_1268_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_1268_cast_fp16 = slice_by_index(begin = var_1268_begin_0, end = var_1268_end_0, end_mask = var_1268_end_mask_0, x = coreml_update_state_25)[name = string("op_1268_cast_fp16")];
            tensor<int32, [1]> V_layer_cache_11_axes_0 = const()[name = string("V_layer_cache_11_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> V_layer_cache_11_cast_fp16 = squeeze(axes = V_layer_cache_11_axes_0, x = var_1268_cast_fp16)[name = string("V_layer_cache_11_cast_fp16")];
            tensor<int32, [1]> x_151_axes_0 = const()[name = string("x_151_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_151_cast_fp16 = expand_dims(axes = x_151_axes_0, x = K_layer_cache_11_cast_fp16)[name = string("x_151_cast_fp16")];
            tensor<int32, [4]> var_1277 = const()[name = string("op_1277"), val = tensor<int32, [4]>([1, 3, 1, 1])];
            tensor<fp16, [8, 3, 1024, 128]> x_153_cast_fp16 = tile(reps = var_1277, x = x_151_cast_fp16)[name = string("x_153_cast_fp16")];
            tensor<int32, [4]> var_1281 = const()[name = string("op_1281"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
            tensor<fp16, [1, 24, 1024, 128]> key_states_23_cast_fp16 = reshape(shape = var_1281, x = x_153_cast_fp16)[name = string("key_states_23_cast_fp16")];
            tensor<int32, [1]> x_157_axes_0 = const()[name = string("x_157_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_157_cast_fp16 = expand_dims(axes = x_157_axes_0, x = V_layer_cache_11_cast_fp16)[name = string("x_157_cast_fp16")];
            tensor<int32, [4]> var_1284 = const()[name = string("op_1284"), val = tensor<int32, [4]>([1, 3, 1, 1])];
            tensor<fp16, [8, 3, 1024, 128]> x_159_cast_fp16 = tile(reps = var_1284, x = x_157_cast_fp16)[name = string("x_159_cast_fp16")];
            tensor<int32, [4]> var_1288 = const()[name = string("op_1288"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
            tensor<fp16, [1, 24, 1024, 128]> value_states_23_cast_fp16 = reshape(shape = var_1288, x = x_159_cast_fp16)[name = string("value_states_23_cast_fp16")];
            bool var_1291_transpose_x_1 = const()[name = string("op_1291_transpose_x_1"), val = bool(false)];
            bool var_1291_transpose_y_1 = const()[name = string("op_1291_transpose_y_1"), val = bool(true)];
            tensor<fp16, [1, 24, 1, 1024]> var_1291_cast_fp16 = matmul(transpose_x = var_1291_transpose_x_1, transpose_y = var_1291_transpose_y_1, x = rotated_21_cast_fp16, y = key_states_23_cast_fp16)[name = string("op_1291_cast_fp16")];
            fp16 var_1292_to_fp16 = const()[name = string("op_1292_to_fp16"), val = fp16(0x1.6ap-4)];
            tensor<fp16, [1, 24, 1, 1024]> attn_weights_21_cast_fp16 = mul(x = var_1291_cast_fp16, y = var_1292_to_fp16)[name = string("attn_weights_21_cast_fp16")];
            tensor<fp16, [1, 24, 1, 1024]> x_161_cast_fp16 = add(x = attn_weights_21_cast_fp16, y = causal_mask)[name = string("x_161_cast_fp16")];
            tensor<int32, [1]> reduce_max_5_axes_0 = const()[name = string("reduce_max_5_axes_0"), val = tensor<int32, [1]>([-1])];
            bool reduce_max_5_keep_dims_0 = const()[name = string("reduce_max_5_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 24, 1, 1]> reduce_max_5_cast_fp16 = reduce_max(axes = reduce_max_5_axes_0, keep_dims = reduce_max_5_keep_dims_0, x = x_161_cast_fp16)[name = string("reduce_max_5_cast_fp16")];
            tensor<fp16, [1, 24, 1, 1024]> x_163_cast_fp16 = sub(x = x_161_cast_fp16, y = reduce_max_5_cast_fp16)[name = string("x_163_cast_fp16")];
            tensor<fp16, [1, 24, 1, 1024]> exp_x_11_cast_fp16 = exp(x = x_163_cast_fp16)[name = string("exp_x_11_cast_fp16")];
            tensor<int32, [1]> var_1303_axes_0 = const()[name = string("op_1303_axes_0"), val = tensor<int32, [1]>([-1])];
            bool var_1303_keep_dims_0 = const()[name = string("op_1303_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 24, 1, 1]> var_1303_cast_fp16 = reduce_sum(axes = var_1303_axes_0, keep_dims = var_1303_keep_dims_0, x = exp_x_11_cast_fp16)[name = string("op_1303_cast_fp16")];
            tensor<fp16, [1, 24, 1, 1024]> attn_weights_23_cast_fp16 = real_div(x = exp_x_11_cast_fp16, y = var_1303_cast_fp16)[name = string("attn_weights_23_cast_fp16")];
            bool attn_output_31_transpose_x_0 = const()[name = string("attn_output_31_transpose_x_0"), val = bool(false)];
            bool attn_output_31_transpose_y_0 = const()[name = string("attn_output_31_transpose_y_0"), val = bool(false)];
            tensor<fp16, [1, 24, 1, 128]> attn_output_31_cast_fp16 = matmul(transpose_x = attn_output_31_transpose_x_0, transpose_y = attn_output_31_transpose_y_0, x = attn_weights_23_cast_fp16, y = value_states_23_cast_fp16)[name = string("attn_output_31_cast_fp16")];
            tensor<int32, [4]> var_1306_perm_0 = const()[name = string("op_1306_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_1308 = const()[name = string("op_1308"), val = tensor<int32, [3]>([1, 1, 3072])];
            tensor<fp16, [1, 1, 24, 128]> var_1306_cast_fp16 = transpose(perm = var_1306_perm_0, x = attn_output_31_cast_fp16)[name = string("transpose_6")];
            tensor<fp16, [1, 1, 3072]> input_75_cast_fp16 = reshape(shape = var_1308, x = var_1306_cast_fp16)[name = string("input_75_cast_fp16")];
            tensor<fp16, [3072, 3072]> model_model_layers_12_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(764951360))), lut = tensor<fp16, [384, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774388608))))[name = string("model_model_layers_12_self_attn_o_proj_weight_promoted_to_fp16_palettized")];
            tensor<fp16, [1, 1, 3072]> linear_5_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_12_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_75_cast_fp16)[name = string("linear_5_cast_fp16")];
            tensor<fp16, [1, 1, 3072]> hidden_states_45_cast_fp16 = add(x = hidden_states_41_cast_fp16, y = linear_5_cast_fp16)[name = string("hidden_states_45_cast_fp16")];
            tensor<int32, [1]> mean_23_axes_0 = const()[name = string("mean_23_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_23_keep_dims_0 = const()[name = string("mean_23_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 1, 1]> mean_23_cast_fp16 = reduce_mean(axes = mean_23_axes_0, keep_dims = mean_23_keep_dims_0, x = hidden_states_45_cast_fp16)[name = string("mean_23_cast_fp16")];
            tensor<fp16, [1, 1, 3072]> input_77_cast_fp16 = sub(x = hidden_states_45_cast_fp16, y = mean_23_cast_fp16)[name = string("input_77_cast_fp16")];
            tensor<int32, [1]> var_1319_axes_0 = const()[name = string("op_1319_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [3072]> model_model_layers_12_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_12_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774585280)))];
            tensor<fp16, [1, 1, 3072]> var_1319_cast_fp16 = layer_norm(axes = var_1319_axes_0, epsilon = var_46_to_fp16, gamma = model_model_layers_12_post_attention_layernorm_weight_to_fp16, x = input_77_cast_fp16)[name = string("op_1319_cast_fp16")];
            tensor<int32, [3]> var_1326 = const()[name = string("op_1326"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> input_79_axes_0 = const()[name = string("input_79_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 3072, 1]> var_1327 = transpose(perm = var_1326, x = var_1319_cast_fp16)[name = string("transpose_5")];
            tensor<fp16, [1, 3072, 1, 1]> input_79 = expand_dims(axes = input_79_axes_0, x = var_1327)[name = string("input_79")];
            string input_81_pad_type_0 = const()[name = string("input_81_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> input_81_strides_0 = const()[name = string("input_81_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> input_81_pad_0 = const()[name = string("input_81_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> input_81_dilations_0 = const()[name = string("input_81_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 input_81_groups_0 = const()[name = string("input_81_groups_0"), val = int32(1)];
            tensor<fp16, [1, 8192, 1, 1]> input_81 = conv(dilations = input_81_dilations_0, groups = input_81_groups_0, pad = input_81_pad_0, pad_type = input_81_pad_type_0, strides = input_81_strides_0, weight = model_model_layers_12_mlp_gate_proj_weight_palettized, x = input_79)[name = string("input_81")];
            string up_states_11_pad_type_0 = const()[name = string("up_states_11_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> up_states_11_strides_0 = const()[name = string("up_states_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> up_states_11_pad_0 = const()[name = string("up_states_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> up_states_11_dilations_0 = const()[name = string("up_states_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 up_states_11_groups_0 = const()[name = string("up_states_11_groups_0"), val = int32(1)];
            tensor<fp16, [1, 8192, 1, 1]> up_states_11 = conv(dilations = up_states_11_dilations_0, groups = up_states_11_groups_0, pad = up_states_11_pad_0, pad_type = up_states_11_pad_type_0, strides = up_states_11_strides_0, weight = model_model_layers_12_mlp_up_proj_weight_palettized, x = input_79)[name = string("up_states_11")];
            tensor<fp16, [1, 8192, 1, 1]> gate_states_11 = silu(x = input_81)[name = string("gate_states_11")];
            tensor<fp16, [1, 8192, 1, 1]> input_83 = mul(x = gate_states_11, y = up_states_11)[name = string("input_83")];
            string hidden_states_47_pad_type_0 = const()[name = string("hidden_states_47_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> hidden_states_47_strides_0 = const()[name = string("hidden_states_47_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> hidden_states_47_pad_0 = const()[name = string("hidden_states_47_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> hidden_states_47_dilations_0 = const()[name = string("hidden_states_47_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 hidden_states_47_groups_0 = const()[name = string("hidden_states_47_groups_0"), val = int32(1)];
            tensor<fp16, [1, 3072, 1, 1]> hidden_states_47 = conv(dilations = hidden_states_47_dilations_0, groups = hidden_states_47_groups_0, pad = hidden_states_47_pad_0, pad_type = hidden_states_47_pad_type_0, strides = hidden_states_47_strides_0, weight = model_model_layers_12_mlp_down_proj_weight_palettized, x = input_83)[name = string("hidden_states_47")];
            tensor<int32, [1]> var_1349_axes_0 = const()[name = string("op_1349_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 3072, 1]> var_1349 = squeeze(axes = var_1349_axes_0, x = hidden_states_47)[name = string("op_1349")];
            tensor<int32, [3]> var_1350 = const()[name = string("op_1350"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 1, 3072]> var_1351 = transpose(perm = var_1350, x = var_1349)[name = string("transpose_4")];
            tensor<fp16, [1, 1, 3072]> hidden_states_49_cast_fp16 = add(x = hidden_states_45_cast_fp16, y = var_1351)[name = string("hidden_states_49_cast_fp16")];
            tensor<int32, [1]> mean_25_axes_0 = const()[name = string("mean_25_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_25_keep_dims_0 = const()[name = string("mean_25_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 1, 1]> mean_25_cast_fp16 = reduce_mean(axes = mean_25_axes_0, keep_dims = mean_25_keep_dims_0, x = hidden_states_49_cast_fp16)[name = string("mean_25_cast_fp16")];
            tensor<fp16, [1, 1, 3072]> input_85_cast_fp16 = sub(x = hidden_states_49_cast_fp16, y = mean_25_cast_fp16)[name = string("input_85_cast_fp16")];
            tensor<int32, [1]> var_1359_axes_0 = const()[name = string("op_1359_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [3072]> model_model_layers_13_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_13_input_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774591488)))];
            tensor<fp16, [1, 1, 3072]> var_1359_cast_fp16 = layer_norm(axes = var_1359_axes_0, epsilon = var_46_to_fp16, gamma = model_model_layers_13_input_layernorm_weight_to_fp16, x = input_85_cast_fp16)[name = string("op_1359_cast_fp16")];
            tensor<int32, [3]> var_1362 = const()[name = string("op_1362"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> var_1364_axes_0 = const()[name = string("op_1364_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 3072, 1]> var_1363 = transpose(perm = var_1362, x = var_1359_cast_fp16)[name = string("transpose_3")];
            tensor<fp16, [1, 3072, 1, 1]> var_1364 = expand_dims(axes = var_1364_axes_0, x = var_1363)[name = string("op_1364")];
            string var_1371_pad_type_0 = const()[name = string("op_1371_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> var_1371_strides_0 = const()[name = string("op_1371_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> var_1371_pad_0 = const()[name = string("op_1371_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> var_1371_dilations_0 = const()[name = string("op_1371_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 var_1371_groups_0 = const()[name = string("op_1371_groups_0"), val = int32(1)];
            tensor<fp16, [1, 3072, 1, 1]> var_1371 = conv(dilations = var_1371_dilations_0, groups = var_1371_groups_0, pad = var_1371_pad_0, pad_type = var_1371_pad_type_0, strides = var_1371_strides_0, weight = model_model_layers_13_self_attn_q_proj_weight_palettized, x = var_1364)[name = string("op_1371")];
            tensor<int32, [4]> var_1372 = const()[name = string("op_1372"), val = tensor<int32, [4]>([1, 24, 1, 128])];
            tensor<fp16, [1, 24, 1, 128]> var_1373 = reshape(shape = var_1372, x = var_1371)[name = string("op_1373")];
            string var_1380_pad_type_0 = const()[name = string("op_1380_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> var_1380_strides_0 = const()[name = string("op_1380_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> var_1380_pad_0 = const()[name = string("op_1380_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> var_1380_dilations_0 = const()[name = string("op_1380_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 var_1380_groups_0 = const()[name = string("op_1380_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 1]> var_1380 = conv(dilations = var_1380_dilations_0, groups = var_1380_groups_0, pad = var_1380_pad_0, pad_type = var_1380_pad_type_0, strides = var_1380_strides_0, weight = model_model_layers_13_self_attn_k_proj_weight_palettized, x = var_1364)[name = string("op_1380")];
            tensor<int32, [4]> var_1381 = const()[name = string("op_1381"), val = tensor<int32, [4]>([1, 8, 1, 128])];
            tensor<fp16, [1, 8, 1, 128]> var_1382 = reshape(shape = var_1381, x = var_1380)[name = string("op_1382")];
            string var_1389_pad_type_0 = const()[name = string("op_1389_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> var_1389_strides_0 = const()[name = string("op_1389_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> var_1389_pad_0 = const()[name = string("op_1389_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> var_1389_dilations_0 = const()[name = string("op_1389_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 var_1389_groups_0 = const()[name = string("op_1389_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 1]> var_1389 = conv(dilations = var_1389_dilations_0, groups = var_1389_groups_0, pad = var_1389_pad_0, pad_type = var_1389_pad_type_0, strides = var_1389_strides_0, weight = model_model_layers_13_self_attn_v_proj_weight_palettized, x = var_1364)[name = string("op_1389")];
            tensor<int32, [4]> var_1390 = const()[name = string("op_1390"), val = tensor<int32, [4]>([1, 8, 1, 128])];
            tensor<fp16, [1, 8, 1, 128]> var_1391 = reshape(shape = var_1390, x = var_1389)[name = string("op_1391")];
            tensor<int32, [4]> x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor<int32, [4]>([1, 24, 1, 64])];
            tensor<bool, [4]> x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 24, 1, 64]> x1_25 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = var_1373)[name = string("x1_25")];
            tensor<int32, [4]> x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor<int32, [4]>([1, 24, 1, 128])];
            tensor<bool, [4]> x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 24, 1, 64]> x2_25 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = var_1373)[name = string("x2_25")];
            tensor<fp16, [1, 24, 1, 64]> var_1405_cast_fp16 = mul(x = x1_25, y = cos_3_cast_fp16)[name = string("op_1405_cast_fp16")];
            tensor<fp16, [1, 24, 1, 64]> var_1406_cast_fp16 = mul(x = x2_25, y = sin_3_cast_fp16)[name = string("op_1406_cast_fp16")];
            tensor<fp16, [1, 24, 1, 64]> var_1407_cast_fp16 = sub(x = var_1405_cast_fp16, y = var_1406_cast_fp16)[name = string("op_1407_cast_fp16")];
            tensor<fp16, [1, 24, 1, 64]> var_1408_cast_fp16 = mul(x = x2_25, y = cos_3_cast_fp16)[name = string("op_1408_cast_fp16")];
            tensor<fp16, [1, 24, 1, 64]> var_1409_cast_fp16 = mul(x = x1_25, y = sin_3_cast_fp16)[name = string("op_1409_cast_fp16")];
            tensor<fp16, [1, 24, 1, 64]> var_1410_cast_fp16 = add(x = var_1408_cast_fp16, y = var_1409_cast_fp16)[name = string("op_1410_cast_fp16")];
            bool rotated_25_interleave_0 = const()[name = string("rotated_25_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 24, 1, 128]> rotated_25_cast_fp16 = concat(axis = var_51, interleave = rotated_25_interleave_0, values = (var_1407_cast_fp16, var_1410_cast_fp16))[name = string("rotated_25_cast_fp16")];
            tensor<int32, [4]> x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_end_0 = const()[name = string("x1_end_0"), val = tensor<int32, [4]>([1, 8, 1, 64])];
            tensor<bool, [4]> x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 8, 1, 64]> x1 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = var_1382)[name = string("x1")];
            tensor<int32, [4]> x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_end_0 = const()[name = string("x2_end_0"), val = tensor<int32, [4]>([1, 8, 1, 128])];
            tensor<bool, [4]> x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 8, 1, 64]> x2 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = var_1382)[name = string("x2")];
            tensor<fp16, [1, 8, 1, 64]> var_1426_cast_fp16 = mul(x = x1, y = cos_3_cast_fp16)[name = string("op_1426_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_1427_cast_fp16 = mul(x = x2, y = sin_3_cast_fp16)[name = string("op_1427_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_1428_cast_fp16 = sub(x = var_1426_cast_fp16, y = var_1427_cast_fp16)[name = string("op_1428_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_1429_cast_fp16 = mul(x = x2, y = cos_3_cast_fp16)[name = string("op_1429_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_1430_cast_fp16 = mul(x = x1, y = sin_3_cast_fp16)[name = string("op_1430_cast_fp16")];
            tensor<fp16, [1, 8, 1, 64]> var_1431_cast_fp16 = add(x = var_1429_cast_fp16, y = var_1430_cast_fp16)[name = string("op_1431_cast_fp16")];
            bool rotated_interleave_0 = const()[name = string("rotated_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 8, 1, 128]> rotated_cast_fp16 = concat(axis = var_51, interleave = rotated_interleave_0, values = (var_1428_cast_fp16, var_1431_cast_fp16))[name = string("rotated_cast_fp16")];
            tensor<int32, [1]> expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor<int32, [1]>([13])];
            tensor<int32, [1]> expand_dims_73 = const()[name = string("expand_dims_73"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_75 = const()[name = string("expand_dims_75"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_76 = const()[name = string("expand_dims_76"), val = tensor<int32, [1]>([14])];
            int32 concat_50_axis_0 = const()[name = string("concat_50_axis_0"), val = int32(0)];
            bool concat_50_interleave_0 = const()[name = string("concat_50_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_50 = concat(axis = concat_50_axis_0, interleave = concat_50_interleave_0, values = (expand_dims_72, expand_dims_73, current_pos, expand_dims_75))[name = string("concat_50")];
            tensor<int32, [1]> concat_51_values1_0 = const()[name = string("concat_51_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_51_values3_0 = const()[name = string("concat_51_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_51_axis_0 = const()[name = string("concat_51_axis_0"), val = int32(0)];
            bool concat_51_interleave_0 = const()[name = string("concat_51_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_51 = concat(axis = concat_51_axis_0, interleave = concat_51_interleave_0, values = (expand_dims_76, concat_51_values1_0, var_326, concat_51_values3_0))[name = string("concat_51")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_13_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_50, begin_mask = model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0, end = concat_51, end_mask = model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_13_stride_0, update = rotated_cast_fp16, x = coreml_update_state_25)[name = string("model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_12_write_state")];
            tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_26 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_12")];
            tensor<int32, [1]> expand_dims_78 = const()[name = string("expand_dims_78"), val = tensor<int32, [1]>([41])];
            tensor<int32, [1]> expand_dims_79 = const()[name = string("expand_dims_79"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_81 = const()[name = string("expand_dims_81"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor<int32, [1]>([42])];
            int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)];
            bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (expand_dims_78, expand_dims_79, current_pos, expand_dims_81))[name = string("concat_54")];
            tensor<int32, [1]> concat_55_values1_0 = const()[name = string("concat_55_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_55_values3_0 = const()[name = string("concat_55_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_55_axis_0 = const()[name = string("concat_55_axis_0"), val = int32(0)];
            bool concat_55_interleave_0 = const()[name = string("concat_55_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_55 = concat(axis = concat_55_axis_0, interleave = concat_55_interleave_0, values = (expand_dims_82, concat_55_values1_0, var_326, concat_55_values3_0))[name = string("concat_55")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_14_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_54, begin_mask = model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0, end = concat_55, end_mask = model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_14_stride_0, update = var_1391, x = coreml_update_state_26)[name = string("model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_13_write_state")];
            tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_27 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_13")];
            tensor<int32, [4]> var_1451_begin_0 = const()[name = string("op_1451_begin_0"), val = tensor<int32, [4]>([13, 0, 0, 0])];
            tensor<int32, [4]> var_1451_end_0 = const()[name = string("op_1451_end_0"), val = tensor<int32, [4]>([14, 8, 1024, 128])];
            tensor<bool, [4]> var_1451_end_mask_0 = const()[name = string("op_1451_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_1451_cast_fp16 = slice_by_index(begin = var_1451_begin_0, end = var_1451_end_0, end_mask = var_1451_end_mask_0, x = coreml_update_state_27)[name = string("op_1451_cast_fp16")];
            tensor<int32, [1]> K_layer_cache_axes_0 = const()[name = string("K_layer_cache_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> K_layer_cache_cast_fp16 = squeeze(axes = K_layer_cache_axes_0, x = var_1451_cast_fp16)[name = string("K_layer_cache_cast_fp16")];
            tensor<int32, [4]> var_1453_begin_0 = const()[name = string("op_1453_begin_0"), val = tensor<int32, [4]>([41, 0, 0, 0])];
            tensor<int32, [4]> var_1453_end_0 = const()[name = string("op_1453_end_0"), val = tensor<int32, [4]>([42, 8, 1024, 128])];
            tensor<bool, [4]> var_1453_end_mask_0 = const()[name = string("op_1453_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_1453_cast_fp16 = slice_by_index(begin = var_1453_begin_0, end = var_1453_end_0, end_mask = var_1453_end_mask_0, x = coreml_update_state_27)[name = string("op_1453_cast_fp16")];
            tensor<int32, [1]> V_layer_cache_axes_0 = const()[name = string("V_layer_cache_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> V_layer_cache_cast_fp16 = squeeze(axes = V_layer_cache_axes_0, x = var_1453_cast_fp16)[name = string("V_layer_cache_cast_fp16")];
            tensor<int32, [1]> x_179_axes_0 = const()[name = string("x_179_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_179_cast_fp16 = expand_dims(axes = x_179_axes_0, x = K_layer_cache_cast_fp16)[name = string("x_179_cast_fp16")];
            tensor<int32, [4]> var_1462 = const()[name = string("op_1462"), val = tensor<int32, [4]>([1, 3, 1, 1])];
            tensor<fp16, [8, 3, 1024, 128]> x_181_cast_fp16 = tile(reps = var_1462, x = x_179_cast_fp16)[name = string("x_181_cast_fp16")];
            tensor<int32, [4]> var_1466 = const()[name = string("op_1466"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
            tensor<fp16, [1, 24, 1024, 128]> key_states_cast_fp16 = reshape(shape = var_1466, x = x_181_cast_fp16)[name = string("key_states_cast_fp16")];
            tensor<int32, [1]> x_185_axes_0 = const()[name = string("x_185_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_185_cast_fp16 = expand_dims(axes = x_185_axes_0, x = V_layer_cache_cast_fp16)[name = string("x_185_cast_fp16")];
            tensor<int32, [4]> var_1469 = const()[name = string("op_1469"), val = tensor<int32, [4]>([1, 3, 1, 1])];
            tensor<fp16, [8, 3, 1024, 128]> x_187_cast_fp16 = tile(reps = var_1469, x = x_185_cast_fp16)[name = string("x_187_cast_fp16")];
            tensor<int32, [4]> var_1473 = const()[name = string("op_1473"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
            tensor<fp16, [1, 24, 1024, 128]> value_states_cast_fp16 = reshape(shape = var_1473, x = x_187_cast_fp16)[name = string("value_states_cast_fp16")];
            bool var_1476_transpose_x_1 = const()[name = string("op_1476_transpose_x_1"), val = bool(false)];
            bool var_1476_transpose_y_1 = const()[name = string("op_1476_transpose_y_1"), val = bool(true)];
            tensor<fp16, [1, 24, 1, 1024]> var_1476_cast_fp16 = matmul(transpose_x = var_1476_transpose_x_1, transpose_y = var_1476_transpose_y_1, x = rotated_25_cast_fp16, y = key_states_cast_fp16)[name = string("op_1476_cast_fp16")];
            fp16 var_1477_to_fp16 = const()[name = string("op_1477_to_fp16"), val = fp16(0x1.6ap-4)];
            tensor<fp16, [1, 24, 1, 1024]> attn_weights_25_cast_fp16 = mul(x = var_1476_cast_fp16, y = var_1477_to_fp16)[name = string("attn_weights_25_cast_fp16")];
            tensor<fp16, [1, 24, 1, 1024]> x_189_cast_fp16 = add(x = attn_weights_25_cast_fp16, y = causal_mask)[name = string("x_189_cast_fp16")];
            tensor<int32, [1]> reduce_max_6_axes_0 = const()[name = string("reduce_max_6_axes_0"), val = tensor<int32, [1]>([-1])];
            bool reduce_max_6_keep_dims_0 = const()[name = string("reduce_max_6_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 24, 1, 1]> reduce_max_6_cast_fp16 = reduce_max(axes = reduce_max_6_axes_0, keep_dims = reduce_max_6_keep_dims_0, x = x_189_cast_fp16)[name = string("reduce_max_6_cast_fp16")];
            tensor<fp16, [1, 24, 1, 1024]> x_191_cast_fp16 = sub(x = x_189_cast_fp16, y = reduce_max_6_cast_fp16)[name = string("x_191_cast_fp16")];
            tensor<fp16, [1, 24, 1, 1024]> exp_x_cast_fp16 = exp(x = x_191_cast_fp16)[name = string("exp_x_cast_fp16")];
            tensor<int32, [1]> var_1488_axes_0 = const()[name = string("op_1488_axes_0"), val = tensor<int32, [1]>([-1])];
            bool var_1488_keep_dims_0 = const()[name = string("op_1488_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 24, 1, 1]> var_1488_cast_fp16 = reduce_sum(axes = var_1488_axes_0, keep_dims = var_1488_keep_dims_0, x = exp_x_cast_fp16)[name = string("op_1488_cast_fp16")];
            tensor<fp16, [1, 24, 1, 1024]> attn_weights_cast_fp16 = real_div(x = exp_x_cast_fp16, y = var_1488_cast_fp16)[name = string("attn_weights_cast_fp16")];
            bool attn_output_37_transpose_x_0 = const()[name = string("attn_output_37_transpose_x_0"), val = bool(false)];
            bool attn_output_37_transpose_y_0 = const()[name = string("attn_output_37_transpose_y_0"), val = bool(false)];
            tensor<fp16, [1, 24, 1, 128]> attn_output_37_cast_fp16 = matmul(transpose_x = attn_output_37_transpose_x_0, transpose_y = attn_output_37_transpose_y_0, x = attn_weights_cast_fp16, y = value_states_cast_fp16)[name = string("attn_output_37_cast_fp16")];
            tensor<int32, [4]> var_1491_perm_0 = const()[name = string("op_1491_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_1493 = const()[name = string("op_1493"), val = tensor<int32, [3]>([1, 1, 3072])];
            tensor<fp16, [1, 1, 24, 128]> var_1491_cast_fp16 = transpose(perm = var_1491_perm_0, x = attn_output_37_cast_fp16)[name = string("transpose_2")];
            tensor<fp16, [1, 1, 3072]> input_89_cast_fp16 = reshape(shape = var_1493, x = var_1491_cast_fp16)[name = string("input_89_cast_fp16")];
            tensor<fp16, [3072, 3072]> model_model_layers_13_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774597696))), lut = tensor<fp16, [384, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(784034944))))[name = string("model_model_layers_13_self_attn_o_proj_weight_promoted_to_fp16_palettized")];
            tensor<fp16, [1, 1, 3072]> linear_6_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_13_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_89_cast_fp16)[name = string("linear_6_cast_fp16")];
            tensor<fp16, [1, 1, 3072]> hidden_states_53_cast_fp16 = add(x = hidden_states_49_cast_fp16, y = linear_6_cast_fp16)[name = string("hidden_states_53_cast_fp16")];
            tensor<int32, [1]> mean_axes_0 = const()[name = string("mean_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_keep_dims_0 = const()[name = string("mean_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 1, 1]> mean_cast_fp16 = reduce_mean(axes = mean_axes_0, keep_dims = mean_keep_dims_0, x = hidden_states_53_cast_fp16)[name = string("mean_cast_fp16")];
            tensor<fp16, [1, 1, 3072]> input_91_cast_fp16 = sub(x = hidden_states_53_cast_fp16, y = mean_cast_fp16)[name = string("input_91_cast_fp16")];
            tensor<int32, [1]> var_1504_axes_0 = const()[name = string("op_1504_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [3072]> model_model_layers_13_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_13_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(784231616)))];
            tensor<fp16, [1, 1, 3072]> var_1504_cast_fp16 = layer_norm(axes = var_1504_axes_0, epsilon = var_46_to_fp16, gamma = model_model_layers_13_post_attention_layernorm_weight_to_fp16, x = input_91_cast_fp16)[name = string("op_1504_cast_fp16")];
            tensor<int32, [3]> var_1511 = const()[name = string("op_1511"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> input_93_axes_0 = const()[name = string("input_93_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 3072, 1]> var_1512 = transpose(perm = var_1511, x = var_1504_cast_fp16)[name = string("transpose_1")];
            tensor<fp16, [1, 3072, 1, 1]> input_93 = expand_dims(axes = input_93_axes_0, x = var_1512)[name = string("input_93")];
            string input_95_pad_type_0 = const()[name = string("input_95_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> input_95_strides_0 = const()[name = string("input_95_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> input_95_pad_0 = const()[name = string("input_95_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> input_95_dilations_0 = const()[name = string("input_95_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 input_95_groups_0 = const()[name = string("input_95_groups_0"), val = int32(1)];
            tensor<fp16, [1, 8192, 1, 1]> input_95 = conv(dilations = input_95_dilations_0, groups = input_95_groups_0, pad = input_95_pad_0, pad_type = input_95_pad_type_0, strides = input_95_strides_0, weight = model_model_layers_13_mlp_gate_proj_weight_palettized, x = input_93)[name = string("input_95")];
            string up_states_pad_type_0 = const()[name = string("up_states_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> up_states_strides_0 = const()[name = string("up_states_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> up_states_pad_0 = const()[name = string("up_states_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> up_states_dilations_0 = const()[name = string("up_states_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 up_states_groups_0 = const()[name = string("up_states_groups_0"), val = int32(1)];
            tensor<fp16, [1, 8192, 1, 1]> up_states = conv(dilations = up_states_dilations_0, groups = up_states_groups_0, pad = up_states_pad_0, pad_type = up_states_pad_type_0, strides = up_states_strides_0, weight = model_model_layers_13_mlp_up_proj_weight_palettized, x = input_93)[name = string("up_states")];
            tensor<fp16, [1, 8192, 1, 1]> gate_states = silu(x = input_95)[name = string("gate_states")];
            tensor<fp16, [1, 8192, 1, 1]> input = mul(x = gate_states, y = up_states)[name = string("input")];
            string hidden_states_pad_type_0 = const()[name = string("hidden_states_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> hidden_states_strides_0 = const()[name = string("hidden_states_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> hidden_states_pad_0 = const()[name = string("hidden_states_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> hidden_states_dilations_0 = const()[name = string("hidden_states_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 hidden_states_groups_0 = const()[name = string("hidden_states_groups_0"), val = int32(1)];
            tensor<fp16, [1, 3072, 1, 1]> hidden_states_1 = conv(dilations = hidden_states_dilations_0, groups = hidden_states_groups_0, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = hidden_states_strides_0, weight = model_model_layers_13_mlp_down_proj_weight_palettized, x = input)[name = string("hidden_states")];
            tensor<int32, [1]> var_1534_axes_0 = const()[name = string("op_1534_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 3072, 1]> var_1534 = squeeze(axes = var_1534_axes_0, x = hidden_states_1)[name = string("op_1534")];
            tensor<int32, [3]> var_1535 = const()[name = string("op_1535"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 1, 3072]> var_1536 = transpose(perm = var_1535, x = var_1534)[name = string("transpose_0")];
            tensor<fp16, [1, 1, 3072]> output_hidden_states = add(x = hidden_states_53_cast_fp16, y = var_1536)[name = string("op_1537_cast_fp16")];
            tensor<int32, [1]> position_ids_tmp = identity(x = position_ids)[name = string("position_ids_tmp")];
        } -> (output_hidden_states);
    func prefill<ios18>(tensor<fp16, [1, 1, 64, 1024]> causal_mask, tensor<int32, [1]> current_pos, tensor<fp16, [1, 64, 3072]> hidden_states, state<tensor<fp16, [56, 8, 1024, 128]>> model_model_kv_cache_0, tensor<int32, [64]> position_ids) {
            tensor<fp16, [3072, 3072, 1, 1]> model_model_layers_7_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), lut = tensor<fp16, [384, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9437312))))[name = string("model_model_layers_7_self_attn_q_proj_weight_palettized")];
            tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_7_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9633984))), lut = tensor<fp16, [128, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12779776))))[name = string("model_model_layers_7_self_attn_k_proj_weight_palettized")];
            tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_7_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12845376))), lut = tensor<fp16, [128, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15991168))))[name = string("model_model_layers_7_self_attn_v_proj_weight_palettized")];
            tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_7_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16056768))), lut = tensor<fp16, [1024, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41222656))))[name = string("model_model_layers_7_mlp_gate_proj_weight_palettized")];
            tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_7_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41747008))), lut = tensor<fp16, [1024, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66912896))))[name = string("model_model_layers_7_mlp_up_proj_weight_palettized")];
            tensor<fp16, [3072, 8192, 1, 1]> model_model_layers_7_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 8192, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67437248))), lut = tensor<fp16, [384, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92603136))))[name = string("model_model_layers_7_mlp_down_proj_weight_palettized")];
            tensor<fp16, [3072, 3072, 1, 1]> model_model_layers_8_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92799808))), lut = tensor<fp16, [384, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102237056))))[name = string("model_model_layers_8_self_attn_q_proj_weight_palettized")];
            tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_8_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102433728))), lut = tensor<fp16, [128, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105579520))))[name = string("model_model_layers_8_self_attn_k_proj_weight_palettized")];
            tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_8_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105645120))), lut = tensor<fp16, [128, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108790912))))[name = string("model_model_layers_8_self_attn_v_proj_weight_palettized")];
            tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_8_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108856512))), lut = tensor<fp16, [1024, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134022400))))[name = string("model_model_layers_8_mlp_gate_proj_weight_palettized")];
            tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_8_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134546752))), lut = tensor<fp16, [1024, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(159712640))))[name = string("model_model_layers_8_mlp_up_proj_weight_palettized")];
            tensor<fp16, [3072, 8192, 1, 1]> model_model_layers_8_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 8192, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160236992))), lut = tensor<fp16, [384, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185402880))))[name = string("model_model_layers_8_mlp_down_proj_weight_palettized")];
            tensor<fp16, [3072, 3072, 1, 1]> model_model_layers_9_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185599552))), lut = tensor<fp16, [384, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(195036800))))[name = string("model_model_layers_9_self_attn_q_proj_weight_palettized")];
            tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_9_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(195233472))), lut = tensor<fp16, [128, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198379264))))[name = string("model_model_layers_9_self_attn_k_proj_weight_palettized")];
            tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_9_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198444864))), lut = tensor<fp16, [128, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201590656))))[name = string("model_model_layers_9_self_attn_v_proj_weight_palettized")];
            tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_9_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(201656256))), lut = tensor<fp16, [1024, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(226822144))))[name = string("model_model_layers_9_mlp_gate_proj_weight_palettized")];
            tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_9_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227346496))), lut = tensor<fp16, [1024, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(252512384))))[name = string("model_model_layers_9_mlp_up_proj_weight_palettized")];
            tensor<fp16, [3072, 8192, 1, 1]> model_model_layers_9_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 8192, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253036736))), lut = tensor<fp16, [384, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(278202624))))[name = string("model_model_layers_9_mlp_down_proj_weight_palettized")];
            tensor<fp16, [3072, 3072, 1, 1]> model_model_layers_10_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(278399296))), lut = tensor<fp16, [384, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287836544))))[name = string("model_model_layers_10_self_attn_q_proj_weight_palettized")];
            tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_10_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(288033216))), lut = tensor<fp16, [128, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(291179008))))[name = string("model_model_layers_10_self_attn_k_proj_weight_palettized")];
            tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_10_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(291244608))), lut = tensor<fp16, [128, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294390400))))[name = string("model_model_layers_10_self_attn_v_proj_weight_palettized")];
            tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_10_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294456000))), lut = tensor<fp16, [1024, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319621888))))[name = string("model_model_layers_10_mlp_gate_proj_weight_palettized")];
            tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_10_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320146240))), lut = tensor<fp16, [1024, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345312128))))[name = string("model_model_layers_10_mlp_up_proj_weight_palettized")];
            tensor<fp16, [3072, 8192, 1, 1]> model_model_layers_10_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 8192, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345836480))), lut = tensor<fp16, [384, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371002368))))[name = string("model_model_layers_10_mlp_down_proj_weight_palettized")];
            tensor<fp16, [3072, 3072, 1, 1]> model_model_layers_11_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371199040))), lut = tensor<fp16, [384, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380636288))))[name = string("model_model_layers_11_self_attn_q_proj_weight_palettized")];
            tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_11_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380832960))), lut = tensor<fp16, [128, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383978752))))[name = string("model_model_layers_11_self_attn_k_proj_weight_palettized")];
            tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_11_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384044352))), lut = tensor<fp16, [128, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387190144))))[name = string("model_model_layers_11_self_attn_v_proj_weight_palettized")];
            tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_11_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387255744))), lut = tensor<fp16, [1024, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412421632))))[name = string("model_model_layers_11_mlp_gate_proj_weight_palettized")];
            tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_11_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412945984))), lut = tensor<fp16, [1024, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438111872))))[name = string("model_model_layers_11_mlp_up_proj_weight_palettized")];
            tensor<fp16, [3072, 8192, 1, 1]> model_model_layers_11_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 8192, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438636224))), lut = tensor<fp16, [384, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(463802112))))[name = string("model_model_layers_11_mlp_down_proj_weight_palettized")];
            tensor<fp16, [3072, 3072, 1, 1]> model_model_layers_12_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(463998784))), lut = tensor<fp16, [384, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473436032))))[name = string("model_model_layers_12_self_attn_q_proj_weight_palettized")];
            tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_12_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473632704))), lut = tensor<fp16, [128, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(476778496))))[name = string("model_model_layers_12_self_attn_k_proj_weight_palettized")];
            tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_12_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(476844096))), lut = tensor<fp16, [128, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(479989888))))[name = string("model_model_layers_12_self_attn_v_proj_weight_palettized")];
            tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_12_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(480055488))), lut = tensor<fp16, [1024, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505221376))))[name = string("model_model_layers_12_mlp_gate_proj_weight_palettized")];
            tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_12_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505745728))), lut = tensor<fp16, [1024, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530911616))))[name = string("model_model_layers_12_mlp_up_proj_weight_palettized")];
            tensor<fp16, [3072, 8192, 1, 1]> model_model_layers_12_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 8192, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531435968))), lut = tensor<fp16, [384, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(556601856))))[name = string("model_model_layers_12_mlp_down_proj_weight_palettized")];
            tensor<fp16, [3072, 3072, 1, 1]> model_model_layers_13_self_attn_q_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(556798528))), lut = tensor<fp16, [384, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(566235776))))[name = string("model_model_layers_13_self_attn_q_proj_weight_palettized")];
            tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_13_self_attn_k_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(566432448))), lut = tensor<fp16, [128, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(569578240))))[name = string("model_model_layers_13_self_attn_k_proj_weight_palettized")];
            tensor<fp16, [1024, 3072, 1, 1]> model_model_layers_13_self_attn_v_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [1024, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(569643840))), lut = tensor<fp16, [128, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(572789632))))[name = string("model_model_layers_13_self_attn_v_proj_weight_palettized")];
            tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_13_mlp_gate_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(572855232))), lut = tensor<fp16, [1024, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(598021120))))[name = string("model_model_layers_13_mlp_gate_proj_weight_palettized")];
            tensor<fp16, [8192, 3072, 1, 1]> model_model_layers_13_mlp_up_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [8192, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(598545472))), lut = tensor<fp16, [1024, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(623711360))))[name = string("model_model_layers_13_mlp_up_proj_weight_palettized")];
            tensor<fp16, [3072, 8192, 1, 1]> model_model_layers_13_mlp_down_proj_weight_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 8192, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(624235712))), lut = tensor<fp16, [384, 1, 1, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(649401600))))[name = string("model_model_layers_13_mlp_down_proj_weight_palettized")];
            int32 var_46 = const()[name = string("op_46"), val = int32(-1)];
            int32 greater_equal_0_y_0 = const()[name = string("greater_equal_0_y_0"), val = int32(0)];
            tensor<bool, [64]> greater_equal_0 = greater_equal(x = position_ids, y = greater_equal_0_y_0)[name = string("greater_equal_0")];
            int32 slice_by_index_0 = const()[name = string("slice_by_index_0"), val = int32(131072)];
            tensor<int32, [64]> add_0 = add(x = position_ids, y = slice_by_index_0)[name = string("add_0")];
            tensor<int32, [64]> select_0 = select(a = position_ids, b = add_0, cond = greater_equal_0)[name = string("select_0")];
            int32 var_238_axis_0 = const()[name = string("op_238_axis_0"), val = int32(1)];
            int32 var_238_batch_dims_0 = const()[name = string("op_238_batch_dims_0"), val = int32(0)];
            bool var_238_validate_indices_0 = const()[name = string("op_238_validate_indices_0"), val = bool(false)];
            tensor<fp16, [1, 131072, 128]> var_57_to_fp16 = const()[name = string("op_57_to_fp16"), val = tensor<fp16, [1, 131072, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(683152768)))];
            tensor<fp16, [1, 64, 128]> var_238_cast_fp16 = gather(axis = var_238_axis_0, batch_dims = var_238_batch_dims_0, indices = select_0, validate_indices = var_238_validate_indices_0, x = var_57_to_fp16)[name = string("op_238_cast_fp16")];
            tensor<int32, [4]> var_239 = const()[name = string("op_239"), val = tensor<int32, [4]>([1, 64, 1, 128])];
            tensor<fp16, [1, 64, 1, 128]> cos_1_cast_fp16 = reshape(shape = var_239, x = var_238_cast_fp16)[name = string("cos_1_cast_fp16")];
            int32 var_243_axis_0 = const()[name = string("op_243_axis_0"), val = int32(1)];
            int32 var_243_batch_dims_0 = const()[name = string("op_243_batch_dims_0"), val = int32(0)];
            bool var_243_validate_indices_0 = const()[name = string("op_243_validate_indices_0"), val = bool(false)];
            tensor<fp16, [1, 131072, 128]> var_52_to_fp16 = const()[name = string("op_52_to_fp16"), val = tensor<fp16, [1, 131072, 128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(649598272)))];
            tensor<fp16, [1, 64, 128]> var_243_cast_fp16 = gather(axis = var_243_axis_0, batch_dims = var_243_batch_dims_0, indices = select_0, validate_indices = var_243_validate_indices_0, x = var_52_to_fp16)[name = string("op_243_cast_fp16")];
            tensor<int32, [4]> var_244 = const()[name = string("op_244"), val = tensor<int32, [4]>([1, 64, 1, 128])];
            tensor<fp16, [1, 64, 1, 128]> sin_1_cast_fp16 = reshape(shape = var_244, x = var_243_cast_fp16)[name = string("sin_1_cast_fp16")];
            tensor<int32, [1]> mean_1_axes_0 = const()[name = string("mean_1_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_1_keep_dims_0 = const()[name = string("mean_1_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 64, 1]> mean_1_cast_fp16 = reduce_mean(axes = mean_1_axes_0, keep_dims = mean_1_keep_dims_0, x = hidden_states)[name = string("mean_1_cast_fp16")];
            tensor<fp16, [1, 64, 3072]> input_1_cast_fp16 = sub(x = hidden_states, y = mean_1_cast_fp16)[name = string("input_1_cast_fp16")];
            tensor<int32, [1]> var_254_axes_0 = const()[name = string("op_254_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [3072]> model_model_layers_7_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_input_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(716707264)))];
            fp16 var_48_to_fp16 = const()[name = string("op_48_to_fp16"), val = fp16(0x1.5p-17)];
            tensor<fp16, [1, 64, 3072]> var_254_cast_fp16 = layer_norm(axes = var_254_axes_0, epsilon = var_48_to_fp16, gamma = model_model_layers_7_input_layernorm_weight_to_fp16, x = input_1_cast_fp16)[name = string("op_254_cast_fp16")];
            tensor<int32, [3]> var_258 = const()[name = string("op_258"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> var_260_axes_0 = const()[name = string("op_260_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 3072, 64]> var_259 = transpose(perm = var_258, x = var_254_cast_fp16)[name = string("transpose_50")];
            tensor<fp16, [1, 3072, 1, 64]> var_260 = expand_dims(axes = var_260_axes_0, x = var_259)[name = string("op_260")];
            string query_states_1_pad_type_0 = const()[name = string("query_states_1_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> query_states_1_strides_0 = const()[name = string("query_states_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> query_states_1_pad_0 = const()[name = string("query_states_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> query_states_1_dilations_0 = const()[name = string("query_states_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 query_states_1_groups_0 = const()[name = string("query_states_1_groups_0"), val = int32(1)];
            tensor<fp16, [1, 3072, 1, 64]> query_states_1 = conv(dilations = query_states_1_dilations_0, groups = query_states_1_groups_0, pad = query_states_1_pad_0, pad_type = query_states_1_pad_type_0, strides = query_states_1_strides_0, weight = model_model_layers_7_self_attn_q_proj_weight_palettized, x = var_260)[name = string("query_states_1")];
            string key_states_1_pad_type_0 = const()[name = string("key_states_1_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> key_states_1_strides_0 = const()[name = string("key_states_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> key_states_1_pad_0 = const()[name = string("key_states_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> key_states_1_dilations_0 = const()[name = string("key_states_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 key_states_1_groups_0 = const()[name = string("key_states_1_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 64]> key_states_1 = conv(dilations = key_states_1_dilations_0, groups = key_states_1_groups_0, pad = key_states_1_pad_0, pad_type = key_states_1_pad_type_0, strides = key_states_1_strides_0, weight = model_model_layers_7_self_attn_k_proj_weight_palettized, x = var_260)[name = string("key_states_1")];
            string value_states_1_pad_type_0 = const()[name = string("value_states_1_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> value_states_1_strides_0 = const()[name = string("value_states_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> value_states_1_pad_0 = const()[name = string("value_states_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> value_states_1_dilations_0 = const()[name = string("value_states_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 value_states_1_groups_0 = const()[name = string("value_states_1_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 64]> value_states_1 = conv(dilations = value_states_1_dilations_0, groups = value_states_1_groups_0, pad = value_states_1_pad_0, pad_type = value_states_1_pad_type_0, strides = value_states_1_strides_0, weight = model_model_layers_7_self_attn_v_proj_weight_palettized, x = var_260)[name = string("value_states_1")];
            tensor<int32, [4]> var_280 = const()[name = string("op_280"), val = tensor<int32, [4]>([1, 24, 128, 64])];
            tensor<fp16, [1, 24, 128, 64]> var_281 = reshape(shape = var_280, x = query_states_1)[name = string("op_281")];
            tensor<int32, [4]> var_282 = const()[name = string("op_282"), val = tensor<int32, [4]>([0, 1, 3, 2])];
            tensor<int32, [4]> var_284 = const()[name = string("op_284"), val = tensor<int32, [4]>([1, 8, 128, 64])];
            tensor<fp16, [1, 8, 128, 64]> var_285 = reshape(shape = var_284, x = key_states_1)[name = string("op_285")];
            tensor<int32, [4]> var_286 = const()[name = string("op_286"), val = tensor<int32, [4]>([0, 1, 3, 2])];
            tensor<int32, [4]> var_288 = const()[name = string("op_288"), val = tensor<int32, [4]>([1, 8, 128, 64])];
            tensor<fp16, [1, 8, 128, 64]> var_289 = reshape(shape = var_288, x = value_states_1)[name = string("op_289")];
            tensor<int32, [4]> var_290 = const()[name = string("op_290"), val = tensor<int32, [4]>([0, 1, 3, 2])];
            tensor<int32, [4]> var_292 = const()[name = string("op_292"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [4]> var_294 = const()[name = string("op_294"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [4]> x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor<int32, [4]>([1, 24, 64, 64])];
            tensor<bool, [4]> x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 24, 64, 128]> x_1 = transpose(perm = var_282, x = var_281)[name = string("transpose_49")];
            tensor<fp16, [1, 24, 64, 64]> x1_1 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = x_1)[name = string("x1_1")];
            tensor<int32, [4]> x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor<int32, [4]>([1, 24, 64, 128])];
            tensor<bool, [4]> x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 24, 64, 64]> x2_1 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = x_1)[name = string("x2_1")];
            tensor<int32, [4]> cos_7_begin_0 = const()[name = string("cos_7_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> cos_7_end_0 = const()[name = string("cos_7_end_0"), val = tensor<int32, [4]>([1, 1, 64, 64])];
            tensor<bool, [4]> cos_7_end_mask_0 = const()[name = string("cos_7_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 1, 64, 128]> cos_5 = transpose(perm = var_292, x = cos_1_cast_fp16)[name = string("transpose_48")];
            tensor<fp16, [1, 1, 64, 64]> cos_7 = slice_by_index(begin = cos_7_begin_0, end = cos_7_end_0, end_mask = cos_7_end_mask_0, x = cos_5)[name = string("cos_7")];
            tensor<int32, [4]> sin_7_begin_0 = const()[name = string("sin_7_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> sin_7_end_0 = const()[name = string("sin_7_end_0"), val = tensor<int32, [4]>([1, 1, 64, 64])];
            tensor<bool, [4]> sin_7_end_mask_0 = const()[name = string("sin_7_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 1, 64, 128]> sin_5 = transpose(perm = var_294, x = sin_1_cast_fp16)[name = string("transpose_47")];
            tensor<fp16, [1, 1, 64, 64]> sin_7 = slice_by_index(begin = sin_7_begin_0, end = sin_7_end_0, end_mask = sin_7_end_mask_0, x = sin_5)[name = string("sin_7")];
            tensor<fp16, [1, 24, 64, 64]> var_308 = mul(x = x1_1, y = cos_7)[name = string("op_308")];
            tensor<fp16, [1, 24, 64, 64]> var_309 = mul(x = x2_1, y = sin_7)[name = string("op_309")];
            tensor<fp16, [1, 24, 64, 64]> var_310 = sub(x = var_308, y = var_309)[name = string("op_310")];
            tensor<fp16, [1, 24, 64, 64]> var_311 = mul(x = x2_1, y = cos_7)[name = string("op_311")];
            tensor<fp16, [1, 24, 64, 64]> var_312 = mul(x = x1_1, y = sin_7)[name = string("op_312")];
            tensor<fp16, [1, 24, 64, 64]> var_313 = add(x = var_311, y = var_312)[name = string("op_313")];
            bool rotated_1_interleave_0 = const()[name = string("rotated_1_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 24, 64, 128]> rotated_1 = concat(axis = var_46, interleave = rotated_1_interleave_0, values = (var_310, var_313))[name = string("rotated_1")];
            tensor<int32, [4]> x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor<int32, [4]>([1, 8, 64, 64])];
            tensor<bool, [4]> x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 8, 64, 128]> x_5 = transpose(perm = var_286, x = var_285)[name = string("transpose_46")];
            tensor<fp16, [1, 8, 64, 64]> x1_3 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = x_5)[name = string("x1_3")];
            tensor<int32, [4]> x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor<int32, [4]>([1, 8, 64, 128])];
            tensor<bool, [4]> x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 8, 64, 64]> x2_3 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = x_5)[name = string("x2_3")];
            tensor<fp16, [1, 8, 64, 64]> var_329 = mul(x = x1_3, y = cos_7)[name = string("op_329")];
            tensor<fp16, [1, 8, 64, 64]> var_330 = mul(x = x2_3, y = sin_7)[name = string("op_330")];
            tensor<fp16, [1, 8, 64, 64]> var_331 = sub(x = var_329, y = var_330)[name = string("op_331")];
            tensor<fp16, [1, 8, 64, 64]> var_332 = mul(x = x2_3, y = cos_7)[name = string("op_332")];
            tensor<fp16, [1, 8, 64, 64]> var_333 = mul(x = x1_3, y = sin_7)[name = string("op_333")];
            tensor<fp16, [1, 8, 64, 64]> var_334 = add(x = var_332, y = var_333)[name = string("op_334")];
            bool rotated_3_interleave_0 = const()[name = string("rotated_3_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 8, 64, 128]> rotated_3 = concat(axis = var_46, interleave = rotated_3_interleave_0, values = (var_331, var_334))[name = string("rotated_3")];
            tensor<int32, [1]> seq_length_1 = const()[name = string("seq_length_1"), val = tensor<int32, [1]>([64])];
            tensor<int32, [1]> var_343 = add(x = current_pos, y = seq_length_1)[name = string("op_343")];
            tensor<fp16, [56, 8, 1024, 128]> read_state_0 = read_state(input = model_model_kv_cache_0)[name = string("read_state_0")];
            tensor<int32, [1]> expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor<int32, [1]>([7])];
            tensor<int32, [1]> expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor<int32, [1]>([8])];
            int32 concat_2_axis_0 = const()[name = string("concat_2_axis_0"), val = int32(0)];
            bool concat_2_interleave_0 = const()[name = string("concat_2_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_2 = concat(axis = concat_2_axis_0, interleave = concat_2_interleave_0, values = (expand_dims_0, expand_dims_1, current_pos, expand_dims_3))[name = string("concat_2")];
            tensor<int32, [1]> concat_3_values1_0 = const()[name = string("concat_3_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_3_values3_0 = const()[name = string("concat_3_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_3_axis_0 = const()[name = string("concat_3_axis_0"), val = int32(0)];
            bool concat_3_interleave_0 = const()[name = string("concat_3_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_3 = concat(axis = concat_3_axis_0, interleave = concat_3_interleave_0, values = (expand_dims_4, concat_3_values1_0, var_343, concat_3_values3_0))[name = string("concat_3")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_1_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_2, begin_mask = model_model_kv_cache_0_internal_tensor_assign_1_begin_mask_0, end = concat_3, end_mask = model_model_kv_cache_0_internal_tensor_assign_1_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_1_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_1_stride_0, update = rotated_3, x = read_state_0)[name = string("model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_1_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_14_write_state")];
            tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_14 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_14")];
            tensor<int32, [1]> expand_dims_6 = const()[name = string("expand_dims_6"), val = tensor<int32, [1]>([35])];
            tensor<int32, [1]> expand_dims_7 = const()[name = string("expand_dims_7"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_9 = const()[name = string("expand_dims_9"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_10 = const()[name = string("expand_dims_10"), val = tensor<int32, [1]>([36])];
            int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)];
            bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (expand_dims_6, expand_dims_7, current_pos, expand_dims_9))[name = string("concat_6")];
            tensor<int32, [1]> concat_7_values1_0 = const()[name = string("concat_7_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_7_values3_0 = const()[name = string("concat_7_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_7_axis_0 = const()[name = string("concat_7_axis_0"), val = int32(0)];
            bool concat_7_interleave_0 = const()[name = string("concat_7_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_7 = concat(axis = concat_7_axis_0, interleave = concat_7_interleave_0, values = (expand_dims_10, concat_7_values1_0, var_343, concat_7_values3_0))[name = string("concat_7")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_2_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [1, 8, 64, 128]> value_states_3 = transpose(perm = var_290, x = var_289)[name = string("transpose_45")];
            tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_6, begin_mask = model_model_kv_cache_0_internal_tensor_assign_2_begin_mask_0, end = concat_7, end_mask = model_model_kv_cache_0_internal_tensor_assign_2_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_2_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_2_stride_0, update = value_states_3, x = coreml_update_state_14)[name = string("model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_2_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_15_write_state")];
            tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_15 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_15")];
            tensor<int32, [4]> var_357_begin_0 = const()[name = string("op_357_begin_0"), val = tensor<int32, [4]>([7, 0, 0, 0])];
            tensor<int32, [4]> var_357_end_0 = const()[name = string("op_357_end_0"), val = tensor<int32, [4]>([8, 8, 1024, 128])];
            tensor<bool, [4]> var_357_end_mask_0 = const()[name = string("op_357_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_357_cast_fp16 = slice_by_index(begin = var_357_begin_0, end = var_357_end_0, end_mask = var_357_end_mask_0, x = coreml_update_state_15)[name = string("op_357_cast_fp16")];
            tensor<int32, [1]> K_layer_cache_1_axes_0 = const()[name = string("K_layer_cache_1_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> K_layer_cache_1_cast_fp16 = squeeze(axes = K_layer_cache_1_axes_0, x = var_357_cast_fp16)[name = string("K_layer_cache_1_cast_fp16")];
            tensor<int32, [4]> var_359_begin_0 = const()[name = string("op_359_begin_0"), val = tensor<int32, [4]>([35, 0, 0, 0])];
            tensor<int32, [4]> var_359_end_0 = const()[name = string("op_359_end_0"), val = tensor<int32, [4]>([36, 8, 1024, 128])];
            tensor<bool, [4]> var_359_end_mask_0 = const()[name = string("op_359_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_359_cast_fp16 = slice_by_index(begin = var_359_begin_0, end = var_359_end_0, end_mask = var_359_end_mask_0, x = coreml_update_state_15)[name = string("op_359_cast_fp16")];
            tensor<int32, [1]> V_layer_cache_1_axes_0 = const()[name = string("V_layer_cache_1_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> V_layer_cache_1_cast_fp16 = squeeze(axes = V_layer_cache_1_axes_0, x = var_359_cast_fp16)[name = string("V_layer_cache_1_cast_fp16")];
            tensor<int32, [1]> x_11_axes_0 = const()[name = string("x_11_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_11_cast_fp16 = expand_dims(axes = x_11_axes_0, x = K_layer_cache_1_cast_fp16)[name = string("x_11_cast_fp16")];
            tensor<int32, [4]> var_368 = const()[name = string("op_368"), val = tensor<int32, [4]>([1, 3, 1, 1])];
            tensor<fp16, [8, 3, 1024, 128]> x_13_cast_fp16 = tile(reps = var_368, x = x_11_cast_fp16)[name = string("x_13_cast_fp16")];
            tensor<int32, [4]> var_372 = const()[name = string("op_372"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
            tensor<fp16, [1, 24, 1024, 128]> var_373_cast_fp16 = reshape(shape = var_372, x = x_13_cast_fp16)[name = string("op_373_cast_fp16")];
            tensor<int32, [1]> x_17_axes_0 = const()[name = string("x_17_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_17_cast_fp16 = expand_dims(axes = x_17_axes_0, x = V_layer_cache_1_cast_fp16)[name = string("x_17_cast_fp16")];
            tensor<int32, [4]> var_375 = const()[name = string("op_375"), val = tensor<int32, [4]>([1, 3, 1, 1])];
            tensor<fp16, [8, 3, 1024, 128]> x_19_cast_fp16 = tile(reps = var_375, x = x_17_cast_fp16)[name = string("x_19_cast_fp16")];
            bool var_382_transpose_x_0 = const()[name = string("op_382_transpose_x_0"), val = bool(false)];
            bool var_382_transpose_y_0 = const()[name = string("op_382_transpose_y_0"), val = bool(true)];
            tensor<fp16, [1, 24, 64, 1024]> var_382_cast_fp16 = matmul(transpose_x = var_382_transpose_x_0, transpose_y = var_382_transpose_y_0, x = rotated_1, y = var_373_cast_fp16)[name = string("op_382_cast_fp16")];
            fp16 var_383_to_fp16 = const()[name = string("op_383_to_fp16"), val = fp16(0x1.6ap-4)];
            tensor<fp16, [1, 24, 64, 1024]> attn_weights_1_cast_fp16 = mul(x = var_382_cast_fp16, y = var_383_to_fp16)[name = string("attn_weights_1_cast_fp16")];
            tensor<fp16, [1, 24, 64, 1024]> x_21_cast_fp16 = add(x = attn_weights_1_cast_fp16, y = causal_mask)[name = string("x_21_cast_fp16")];
            tensor<int32, [1]> reduce_max_0_axes_0 = const()[name = string("reduce_max_0_axes_0"), val = tensor<int32, [1]>([-1])];
            bool reduce_max_0_keep_dims_0 = const()[name = string("reduce_max_0_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 24, 64, 1]> reduce_max_0_cast_fp16 = reduce_max(axes = reduce_max_0_axes_0, keep_dims = reduce_max_0_keep_dims_0, x = x_21_cast_fp16)[name = string("reduce_max_0_cast_fp16")];
            tensor<fp16, [1, 24, 64, 1024]> x_23_cast_fp16 = sub(x = x_21_cast_fp16, y = reduce_max_0_cast_fp16)[name = string("x_23_cast_fp16")];
            tensor<fp16, [1, 24, 64, 1024]> exp_x_1_cast_fp16 = exp(x = x_23_cast_fp16)[name = string("exp_x_1_cast_fp16")];
            tensor<int32, [1]> var_394_axes_0 = const()[name = string("op_394_axes_0"), val = tensor<int32, [1]>([-1])];
            bool var_394_keep_dims_0 = const()[name = string("op_394_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 24, 64, 1]> var_394_cast_fp16 = reduce_sum(axes = var_394_axes_0, keep_dims = var_394_keep_dims_0, x = exp_x_1_cast_fp16)[name = string("op_394_cast_fp16")];
            tensor<fp16, [1, 24, 64, 1024]> var_395_cast_fp16 = real_div(x = exp_x_1_cast_fp16, y = var_394_cast_fp16)[name = string("op_395_cast_fp16")];
            tensor<int32, [3]> concat_12 = const()[name = string("concat_12"), val = tensor<int32, [3]>([24, 64, 1024])];
            tensor<fp16, [24, 64, 1024]> reshape_0_cast_fp16 = reshape(shape = concat_12, x = var_395_cast_fp16)[name = string("reshape_0_cast_fp16")];
            tensor<int32, [3]> concat_13 = const()[name = string("concat_13"), val = tensor<int32, [3]>([24, 1024, 128])];
            tensor<fp16, [24, 1024, 128]> reshape_1_cast_fp16 = reshape(shape = concat_13, x = x_19_cast_fp16)[name = string("reshape_1_cast_fp16")];
            bool matmul_0_transpose_x_0 = const()[name = string("matmul_0_transpose_x_0"), val = bool(false)];
            bool matmul_0_transpose_y_0 = const()[name = string("matmul_0_transpose_y_0"), val = bool(false)];
            tensor<fp16, [24, 64, 128]> matmul_0_cast_fp16 = matmul(transpose_x = matmul_0_transpose_x_0, transpose_y = matmul_0_transpose_y_0, x = reshape_0_cast_fp16, y = reshape_1_cast_fp16)[name = string("matmul_0_cast_fp16")];
            tensor<int32, [4]> concat_17 = const()[name = string("concat_17"), val = tensor<int32, [4]>([1, 24, 64, 128])];
            tensor<fp16, [1, 24, 64, 128]> reshape_2_cast_fp16 = reshape(shape = concat_17, x = matmul_0_cast_fp16)[name = string("reshape_2_cast_fp16")];
            tensor<int32, [4]> var_398_perm_0 = const()[name = string("op_398_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_400 = const()[name = string("op_400"), val = tensor<int32, [3]>([1, 64, 3072])];
            tensor<fp16, [1, 64, 24, 128]> var_398_cast_fp16 = transpose(perm = var_398_perm_0, x = reshape_2_cast_fp16)[name = string("transpose_44")];
            tensor<fp16, [1, 64, 3072]> input_5_cast_fp16 = reshape(shape = var_400, x = var_398_cast_fp16)[name = string("input_5_cast_fp16")];
            tensor<fp16, [3072, 3072]> model_model_layers_7_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(716713472))), lut = tensor<fp16, [384, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(726150720))))[name = string("model_model_layers_7_self_attn_o_proj_weight_promoted_to_fp16_palettized")];
            tensor<fp16, [3072]> linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(726347392)))];
            tensor<fp16, [1, 64, 3072]> linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_7_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_5_cast_fp16)[name = string("linear_0_cast_fp16")];
            tensor<fp16, [1, 64, 3072]> hidden_states_5_cast_fp16 = add(x = hidden_states, y = linear_0_cast_fp16)[name = string("hidden_states_5_cast_fp16")];
            tensor<int32, [1]> mean_3_axes_0 = const()[name = string("mean_3_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_3_keep_dims_0 = const()[name = string("mean_3_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 64, 1]> mean_3_cast_fp16 = reduce_mean(axes = mean_3_axes_0, keep_dims = mean_3_keep_dims_0, x = hidden_states_5_cast_fp16)[name = string("mean_3_cast_fp16")];
            tensor<fp16, [1, 64, 3072]> input_7_cast_fp16 = sub(x = hidden_states_5_cast_fp16, y = mean_3_cast_fp16)[name = string("input_7_cast_fp16")];
            tensor<int32, [1]> var_411_axes_0 = const()[name = string("op_411_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [3072]> model_model_layers_7_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(726353600)))];
            tensor<fp16, [1, 64, 3072]> var_411_cast_fp16 = layer_norm(axes = var_411_axes_0, epsilon = var_48_to_fp16, gamma = model_model_layers_7_post_attention_layernorm_weight_to_fp16, x = input_7_cast_fp16)[name = string("op_411_cast_fp16")];
            tensor<int32, [3]> var_418 = const()[name = string("op_418"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> input_9_axes_0 = const()[name = string("input_9_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 3072, 64]> var_419 = transpose(perm = var_418, x = var_411_cast_fp16)[name = string("transpose_43")];
            tensor<fp16, [1, 3072, 1, 64]> input_9 = expand_dims(axes = input_9_axes_0, x = var_419)[name = string("input_9")];
            string input_11_pad_type_0 = const()[name = string("input_11_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> input_11_strides_0 = const()[name = string("input_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> input_11_pad_0 = const()[name = string("input_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> input_11_dilations_0 = const()[name = string("input_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 input_11_groups_0 = const()[name = string("input_11_groups_0"), val = int32(1)];
            tensor<fp16, [1, 8192, 1, 64]> input_11 = conv(dilations = input_11_dilations_0, groups = input_11_groups_0, pad = input_11_pad_0, pad_type = input_11_pad_type_0, strides = input_11_strides_0, weight = model_model_layers_7_mlp_gate_proj_weight_palettized, x = input_9)[name = string("input_11")];
            string up_states_1_pad_type_0 = const()[name = string("up_states_1_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> up_states_1_strides_0 = const()[name = string("up_states_1_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> up_states_1_pad_0 = const()[name = string("up_states_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> up_states_1_dilations_0 = const()[name = string("up_states_1_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 up_states_1_groups_0 = const()[name = string("up_states_1_groups_0"), val = int32(1)];
            tensor<fp16, [1, 8192, 1, 64]> up_states_1 = conv(dilations = up_states_1_dilations_0, groups = up_states_1_groups_0, pad = up_states_1_pad_0, pad_type = up_states_1_pad_type_0, strides = up_states_1_strides_0, weight = model_model_layers_7_mlp_up_proj_weight_palettized, x = input_9)[name = string("up_states_1")];
            tensor<fp16, [1, 8192, 1, 64]> gate_states_1 = silu(x = input_11)[name = string("gate_states_1")];
            tensor<fp16, [1, 8192, 1, 64]> input_13 = mul(x = gate_states_1, y = up_states_1)[name = string("input_13")];
            string hidden_states_7_pad_type_0 = const()[name = string("hidden_states_7_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> hidden_states_7_strides_0 = const()[name = string("hidden_states_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> hidden_states_7_pad_0 = const()[name = string("hidden_states_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> hidden_states_7_dilations_0 = const()[name = string("hidden_states_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 hidden_states_7_groups_0 = const()[name = string("hidden_states_7_groups_0"), val = int32(1)];
            tensor<fp16, [1, 3072, 1, 64]> hidden_states_7 = conv(dilations = hidden_states_7_dilations_0, groups = hidden_states_7_groups_0, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = hidden_states_7_strides_0, weight = model_model_layers_7_mlp_down_proj_weight_palettized, x = input_13)[name = string("hidden_states_7")];
            tensor<int32, [1]> var_441_axes_0 = const()[name = string("op_441_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 3072, 64]> var_441 = squeeze(axes = var_441_axes_0, x = hidden_states_7)[name = string("op_441")];
            tensor<int32, [3]> var_442 = const()[name = string("op_442"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 64, 3072]> var_443 = transpose(perm = var_442, x = var_441)[name = string("transpose_42")];
            tensor<fp16, [1, 64, 3072]> hidden_states_9_cast_fp16 = add(x = hidden_states_5_cast_fp16, y = var_443)[name = string("hidden_states_9_cast_fp16")];
            tensor<int32, [1]> mean_5_axes_0 = const()[name = string("mean_5_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_5_keep_dims_0 = const()[name = string("mean_5_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 64, 1]> mean_5_cast_fp16 = reduce_mean(axes = mean_5_axes_0, keep_dims = mean_5_keep_dims_0, x = hidden_states_9_cast_fp16)[name = string("mean_5_cast_fp16")];
            tensor<fp16, [1, 64, 3072]> input_15_cast_fp16 = sub(x = hidden_states_9_cast_fp16, y = mean_5_cast_fp16)[name = string("input_15_cast_fp16")];
            tensor<int32, [1]> var_451_axes_0 = const()[name = string("op_451_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [3072]> model_model_layers_8_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_8_input_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(726359808)))];
            tensor<fp16, [1, 64, 3072]> var_451_cast_fp16 = layer_norm(axes = var_451_axes_0, epsilon = var_48_to_fp16, gamma = model_model_layers_8_input_layernorm_weight_to_fp16, x = input_15_cast_fp16)[name = string("op_451_cast_fp16")];
            tensor<int32, [3]> var_455 = const()[name = string("op_455"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> var_457_axes_0 = const()[name = string("op_457_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 3072, 64]> var_456 = transpose(perm = var_455, x = var_451_cast_fp16)[name = string("transpose_41")];
            tensor<fp16, [1, 3072, 1, 64]> var_457 = expand_dims(axes = var_457_axes_0, x = var_456)[name = string("op_457")];
            string query_states_5_pad_type_0 = const()[name = string("query_states_5_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> query_states_5_strides_0 = const()[name = string("query_states_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> query_states_5_pad_0 = const()[name = string("query_states_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> query_states_5_dilations_0 = const()[name = string("query_states_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 query_states_5_groups_0 = const()[name = string("query_states_5_groups_0"), val = int32(1)];
            tensor<fp16, [1, 3072, 1, 64]> query_states_5 = conv(dilations = query_states_5_dilations_0, groups = query_states_5_groups_0, pad = query_states_5_pad_0, pad_type = query_states_5_pad_type_0, strides = query_states_5_strides_0, weight = model_model_layers_8_self_attn_q_proj_weight_palettized, x = var_457)[name = string("query_states_5")];
            string key_states_7_pad_type_0 = const()[name = string("key_states_7_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> key_states_7_strides_0 = const()[name = string("key_states_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> key_states_7_pad_0 = const()[name = string("key_states_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> key_states_7_dilations_0 = const()[name = string("key_states_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 key_states_7_groups_0 = const()[name = string("key_states_7_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 64]> key_states_7 = conv(dilations = key_states_7_dilations_0, groups = key_states_7_groups_0, pad = key_states_7_pad_0, pad_type = key_states_7_pad_type_0, strides = key_states_7_strides_0, weight = model_model_layers_8_self_attn_k_proj_weight_palettized, x = var_457)[name = string("key_states_7")];
            string value_states_7_pad_type_0 = const()[name = string("value_states_7_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> value_states_7_strides_0 = const()[name = string("value_states_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> value_states_7_pad_0 = const()[name = string("value_states_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> value_states_7_dilations_0 = const()[name = string("value_states_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 value_states_7_groups_0 = const()[name = string("value_states_7_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 64]> value_states_7 = conv(dilations = value_states_7_dilations_0, groups = value_states_7_groups_0, pad = value_states_7_pad_0, pad_type = value_states_7_pad_type_0, strides = value_states_7_strides_0, weight = model_model_layers_8_self_attn_v_proj_weight_palettized, x = var_457)[name = string("value_states_7")];
            tensor<int32, [4]> var_477 = const()[name = string("op_477"), val = tensor<int32, [4]>([1, 24, 128, 64])];
            tensor<fp16, [1, 24, 128, 64]> var_478 = reshape(shape = var_477, x = query_states_5)[name = string("op_478")];
            tensor<int32, [4]> var_479 = const()[name = string("op_479"), val = tensor<int32, [4]>([0, 1, 3, 2])];
            tensor<int32, [4]> var_481 = const()[name = string("op_481"), val = tensor<int32, [4]>([1, 8, 128, 64])];
            tensor<fp16, [1, 8, 128, 64]> var_482 = reshape(shape = var_481, x = key_states_7)[name = string("op_482")];
            tensor<int32, [4]> var_483 = const()[name = string("op_483"), val = tensor<int32, [4]>([0, 1, 3, 2])];
            tensor<int32, [4]> var_485 = const()[name = string("op_485"), val = tensor<int32, [4]>([1, 8, 128, 64])];
            tensor<fp16, [1, 8, 128, 64]> var_486 = reshape(shape = var_485, x = value_states_7)[name = string("op_486")];
            tensor<int32, [4]> var_487 = const()[name = string("op_487"), val = tensor<int32, [4]>([0, 1, 3, 2])];
            tensor<int32, [4]> x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor<int32, [4]>([1, 24, 64, 64])];
            tensor<bool, [4]> x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 24, 64, 128]> x_29 = transpose(perm = var_479, x = var_478)[name = string("transpose_40")];
            tensor<fp16, [1, 24, 64, 64]> x1_5 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = x_29)[name = string("x1_5")];
            tensor<int32, [4]> x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor<int32, [4]>([1, 24, 64, 128])];
            tensor<bool, [4]> x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 24, 64, 64]> x2_5 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = x_29)[name = string("x2_5")];
            tensor<fp16, [1, 24, 64, 64]> var_505 = mul(x = x1_5, y = cos_7)[name = string("op_505")];
            tensor<fp16, [1, 24, 64, 64]> var_506 = mul(x = x2_5, y = sin_7)[name = string("op_506")];
            tensor<fp16, [1, 24, 64, 64]> var_507 = sub(x = var_505, y = var_506)[name = string("op_507")];
            tensor<fp16, [1, 24, 64, 64]> var_508 = mul(x = x2_5, y = cos_7)[name = string("op_508")];
            tensor<fp16, [1, 24, 64, 64]> var_509 = mul(x = x1_5, y = sin_7)[name = string("op_509")];
            tensor<fp16, [1, 24, 64, 64]> var_510 = add(x = var_508, y = var_509)[name = string("op_510")];
            bool rotated_5_interleave_0 = const()[name = string("rotated_5_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 24, 64, 128]> rotated_5 = concat(axis = var_46, interleave = rotated_5_interleave_0, values = (var_507, var_510))[name = string("rotated_5")];
            tensor<int32, [4]> x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor<int32, [4]>([1, 8, 64, 64])];
            tensor<bool, [4]> x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 8, 64, 128]> x_33 = transpose(perm = var_483, x = var_482)[name = string("transpose_39")];
            tensor<fp16, [1, 8, 64, 64]> x1_7 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = x_33)[name = string("x1_7")];
            tensor<int32, [4]> x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor<int32, [4]>([1, 8, 64, 128])];
            tensor<bool, [4]> x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 8, 64, 64]> x2_7 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = x_33)[name = string("x2_7")];
            tensor<fp16, [1, 8, 64, 64]> var_526 = mul(x = x1_7, y = cos_7)[name = string("op_526")];
            tensor<fp16, [1, 8, 64, 64]> var_527 = mul(x = x2_7, y = sin_7)[name = string("op_527")];
            tensor<fp16, [1, 8, 64, 64]> var_528 = sub(x = var_526, y = var_527)[name = string("op_528")];
            tensor<fp16, [1, 8, 64, 64]> var_529 = mul(x = x2_7, y = cos_7)[name = string("op_529")];
            tensor<fp16, [1, 8, 64, 64]> var_530 = mul(x = x1_7, y = sin_7)[name = string("op_530")];
            tensor<fp16, [1, 8, 64, 64]> var_531 = add(x = var_529, y = var_530)[name = string("op_531")];
            bool rotated_7_interleave_0 = const()[name = string("rotated_7_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 8, 64, 128]> rotated_7 = concat(axis = var_46, interleave = rotated_7_interleave_0, values = (var_528, var_531))[name = string("rotated_7")];
            tensor<int32, [1]> expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor<int32, [1]>([8])];
            tensor<int32, [1]> expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_16 = const()[name = string("expand_dims_16"), val = tensor<int32, [1]>([9])];
            int32 concat_20_axis_0 = const()[name = string("concat_20_axis_0"), val = int32(0)];
            bool concat_20_interleave_0 = const()[name = string("concat_20_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_20 = concat(axis = concat_20_axis_0, interleave = concat_20_interleave_0, values = (expand_dims_12, expand_dims_13, current_pos, expand_dims_15))[name = string("concat_20")];
            tensor<int32, [1]> concat_21_values1_0 = const()[name = string("concat_21_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_21_values3_0 = const()[name = string("concat_21_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_21_axis_0 = const()[name = string("concat_21_axis_0"), val = int32(0)];
            bool concat_21_interleave_0 = const()[name = string("concat_21_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_21 = concat(axis = concat_21_axis_0, interleave = concat_21_interleave_0, values = (expand_dims_16, concat_21_values1_0, var_343, concat_21_values3_0))[name = string("concat_21")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_3_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_20, begin_mask = model_model_kv_cache_0_internal_tensor_assign_3_begin_mask_0, end = concat_21, end_mask = model_model_kv_cache_0_internal_tensor_assign_3_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_3_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_3_stride_0, update = rotated_7, x = coreml_update_state_15)[name = string("model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_3_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_16_write_state")];
            tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_16 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_16")];
            tensor<int32, [1]> expand_dims_18 = const()[name = string("expand_dims_18"), val = tensor<int32, [1]>([36])];
            tensor<int32, [1]> expand_dims_19 = const()[name = string("expand_dims_19"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_21 = const()[name = string("expand_dims_21"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_22 = const()[name = string("expand_dims_22"), val = tensor<int32, [1]>([37])];
            int32 concat_24_axis_0 = const()[name = string("concat_24_axis_0"), val = int32(0)];
            bool concat_24_interleave_0 = const()[name = string("concat_24_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_24 = concat(axis = concat_24_axis_0, interleave = concat_24_interleave_0, values = (expand_dims_18, expand_dims_19, current_pos, expand_dims_21))[name = string("concat_24")];
            tensor<int32, [1]> concat_25_values1_0 = const()[name = string("concat_25_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_25_values3_0 = const()[name = string("concat_25_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_25_axis_0 = const()[name = string("concat_25_axis_0"), val = int32(0)];
            bool concat_25_interleave_0 = const()[name = string("concat_25_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_25 = concat(axis = concat_25_axis_0, interleave = concat_25_interleave_0, values = (expand_dims_22, concat_25_values1_0, var_343, concat_25_values3_0))[name = string("concat_25")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_4_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [1, 8, 64, 128]> value_states_9 = transpose(perm = var_487, x = var_486)[name = string("transpose_38")];
            tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_24, begin_mask = model_model_kv_cache_0_internal_tensor_assign_4_begin_mask_0, end = concat_25, end_mask = model_model_kv_cache_0_internal_tensor_assign_4_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_4_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_4_stride_0, update = value_states_9, x = coreml_update_state_16)[name = string("model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_4_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_17_write_state")];
            tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_17 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_17")];
            tensor<int32, [4]> var_554_begin_0 = const()[name = string("op_554_begin_0"), val = tensor<int32, [4]>([8, 0, 0, 0])];
            tensor<int32, [4]> var_554_end_0 = const()[name = string("op_554_end_0"), val = tensor<int32, [4]>([9, 8, 1024, 128])];
            tensor<bool, [4]> var_554_end_mask_0 = const()[name = string("op_554_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_554_cast_fp16 = slice_by_index(begin = var_554_begin_0, end = var_554_end_0, end_mask = var_554_end_mask_0, x = coreml_update_state_17)[name = string("op_554_cast_fp16")];
            tensor<int32, [1]> K_layer_cache_3_axes_0 = const()[name = string("K_layer_cache_3_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> K_layer_cache_3_cast_fp16 = squeeze(axes = K_layer_cache_3_axes_0, x = var_554_cast_fp16)[name = string("K_layer_cache_3_cast_fp16")];
            tensor<int32, [4]> var_556_begin_0 = const()[name = string("op_556_begin_0"), val = tensor<int32, [4]>([36, 0, 0, 0])];
            tensor<int32, [4]> var_556_end_0 = const()[name = string("op_556_end_0"), val = tensor<int32, [4]>([37, 8, 1024, 128])];
            tensor<bool, [4]> var_556_end_mask_0 = const()[name = string("op_556_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_556_cast_fp16 = slice_by_index(begin = var_556_begin_0, end = var_556_end_0, end_mask = var_556_end_mask_0, x = coreml_update_state_17)[name = string("op_556_cast_fp16")];
            tensor<int32, [1]> V_layer_cache_3_axes_0 = const()[name = string("V_layer_cache_3_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> V_layer_cache_3_cast_fp16 = squeeze(axes = V_layer_cache_3_axes_0, x = var_556_cast_fp16)[name = string("V_layer_cache_3_cast_fp16")];
            tensor<int32, [1]> x_39_axes_0 = const()[name = string("x_39_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_39_cast_fp16 = expand_dims(axes = x_39_axes_0, x = K_layer_cache_3_cast_fp16)[name = string("x_39_cast_fp16")];
            tensor<int32, [4]> var_565 = const()[name = string("op_565"), val = tensor<int32, [4]>([1, 3, 1, 1])];
            tensor<fp16, [8, 3, 1024, 128]> x_41_cast_fp16 = tile(reps = var_565, x = x_39_cast_fp16)[name = string("x_41_cast_fp16")];
            tensor<int32, [4]> var_569 = const()[name = string("op_569"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
            tensor<fp16, [1, 24, 1024, 128]> var_570_cast_fp16 = reshape(shape = var_569, x = x_41_cast_fp16)[name = string("op_570_cast_fp16")];
            tensor<int32, [1]> x_45_axes_0 = const()[name = string("x_45_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_45_cast_fp16 = expand_dims(axes = x_45_axes_0, x = V_layer_cache_3_cast_fp16)[name = string("x_45_cast_fp16")];
            tensor<int32, [4]> var_572 = const()[name = string("op_572"), val = tensor<int32, [4]>([1, 3, 1, 1])];
            tensor<fp16, [8, 3, 1024, 128]> x_47_cast_fp16 = tile(reps = var_572, x = x_45_cast_fp16)[name = string("x_47_cast_fp16")];
            bool var_579_transpose_x_0 = const()[name = string("op_579_transpose_x_0"), val = bool(false)];
            bool var_579_transpose_y_0 = const()[name = string("op_579_transpose_y_0"), val = bool(true)];
            tensor<fp16, [1, 24, 64, 1024]> var_579_cast_fp16 = matmul(transpose_x = var_579_transpose_x_0, transpose_y = var_579_transpose_y_0, x = rotated_5, y = var_570_cast_fp16)[name = string("op_579_cast_fp16")];
            fp16 var_580_to_fp16 = const()[name = string("op_580_to_fp16"), val = fp16(0x1.6ap-4)];
            tensor<fp16, [1, 24, 64, 1024]> attn_weights_3_cast_fp16 = mul(x = var_579_cast_fp16, y = var_580_to_fp16)[name = string("attn_weights_3_cast_fp16")];
            tensor<fp16, [1, 24, 64, 1024]> x_49_cast_fp16 = add(x = attn_weights_3_cast_fp16, y = causal_mask)[name = string("x_49_cast_fp16")];
            tensor<int32, [1]> reduce_max_1_axes_0 = const()[name = string("reduce_max_1_axes_0"), val = tensor<int32, [1]>([-1])];
            bool reduce_max_1_keep_dims_0 = const()[name = string("reduce_max_1_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 24, 64, 1]> reduce_max_1_cast_fp16 = reduce_max(axes = reduce_max_1_axes_0, keep_dims = reduce_max_1_keep_dims_0, x = x_49_cast_fp16)[name = string("reduce_max_1_cast_fp16")];
            tensor<fp16, [1, 24, 64, 1024]> x_51_cast_fp16 = sub(x = x_49_cast_fp16, y = reduce_max_1_cast_fp16)[name = string("x_51_cast_fp16")];
            tensor<fp16, [1, 24, 64, 1024]> exp_x_3_cast_fp16 = exp(x = x_51_cast_fp16)[name = string("exp_x_3_cast_fp16")];
            tensor<int32, [1]> var_591_axes_0 = const()[name = string("op_591_axes_0"), val = tensor<int32, [1]>([-1])];
            bool var_591_keep_dims_0 = const()[name = string("op_591_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 24, 64, 1]> var_591_cast_fp16 = reduce_sum(axes = var_591_axes_0, keep_dims = var_591_keep_dims_0, x = exp_x_3_cast_fp16)[name = string("op_591_cast_fp16")];
            tensor<fp16, [1, 24, 64, 1024]> var_592_cast_fp16 = real_div(x = exp_x_3_cast_fp16, y = var_591_cast_fp16)[name = string("op_592_cast_fp16")];
            tensor<int32, [3]> concat_30 = const()[name = string("concat_30"), val = tensor<int32, [3]>([24, 64, 1024])];
            tensor<fp16, [24, 64, 1024]> reshape_3_cast_fp16 = reshape(shape = concat_30, x = var_592_cast_fp16)[name = string("reshape_3_cast_fp16")];
            tensor<int32, [3]> concat_31 = const()[name = string("concat_31"), val = tensor<int32, [3]>([24, 1024, 128])];
            tensor<fp16, [24, 1024, 128]> reshape_4_cast_fp16 = reshape(shape = concat_31, x = x_47_cast_fp16)[name = string("reshape_4_cast_fp16")];
            bool matmul_1_transpose_x_0 = const()[name = string("matmul_1_transpose_x_0"), val = bool(false)];
            bool matmul_1_transpose_y_0 = const()[name = string("matmul_1_transpose_y_0"), val = bool(false)];
            tensor<fp16, [24, 64, 128]> matmul_1_cast_fp16 = matmul(transpose_x = matmul_1_transpose_x_0, transpose_y = matmul_1_transpose_y_0, x = reshape_3_cast_fp16, y = reshape_4_cast_fp16)[name = string("matmul_1_cast_fp16")];
            tensor<int32, [4]> concat_35 = const()[name = string("concat_35"), val = tensor<int32, [4]>([1, 24, 64, 128])];
            tensor<fp16, [1, 24, 64, 128]> reshape_5_cast_fp16 = reshape(shape = concat_35, x = matmul_1_cast_fp16)[name = string("reshape_5_cast_fp16")];
            tensor<int32, [4]> var_595_perm_0 = const()[name = string("op_595_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_597 = const()[name = string("op_597"), val = tensor<int32, [3]>([1, 64, 3072])];
            tensor<fp16, [1, 64, 24, 128]> var_595_cast_fp16 = transpose(perm = var_595_perm_0, x = reshape_5_cast_fp16)[name = string("transpose_37")];
            tensor<fp16, [1, 64, 3072]> input_19_cast_fp16 = reshape(shape = var_597, x = var_595_cast_fp16)[name = string("input_19_cast_fp16")];
            tensor<fp16, [3072, 3072]> model_model_layers_8_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(726366016))), lut = tensor<fp16, [384, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(735803264))))[name = string("model_model_layers_8_self_attn_o_proj_weight_promoted_to_fp16_palettized")];
            tensor<fp16, [1, 64, 3072]> linear_1_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_8_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_19_cast_fp16)[name = string("linear_1_cast_fp16")];
            tensor<fp16, [1, 64, 3072]> hidden_states_13_cast_fp16 = add(x = hidden_states_9_cast_fp16, y = linear_1_cast_fp16)[name = string("hidden_states_13_cast_fp16")];
            tensor<int32, [1]> mean_7_axes_0 = const()[name = string("mean_7_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_7_keep_dims_0 = const()[name = string("mean_7_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 64, 1]> mean_7_cast_fp16 = reduce_mean(axes = mean_7_axes_0, keep_dims = mean_7_keep_dims_0, x = hidden_states_13_cast_fp16)[name = string("mean_7_cast_fp16")];
            tensor<fp16, [1, 64, 3072]> input_21_cast_fp16 = sub(x = hidden_states_13_cast_fp16, y = mean_7_cast_fp16)[name = string("input_21_cast_fp16")];
            tensor<int32, [1]> var_608_axes_0 = const()[name = string("op_608_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [3072]> model_model_layers_8_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_8_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(735999936)))];
            tensor<fp16, [1, 64, 3072]> var_608_cast_fp16 = layer_norm(axes = var_608_axes_0, epsilon = var_48_to_fp16, gamma = model_model_layers_8_post_attention_layernorm_weight_to_fp16, x = input_21_cast_fp16)[name = string("op_608_cast_fp16")];
            tensor<int32, [3]> var_615 = const()[name = string("op_615"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> input_23_axes_0 = const()[name = string("input_23_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 3072, 64]> var_616 = transpose(perm = var_615, x = var_608_cast_fp16)[name = string("transpose_36")];
            tensor<fp16, [1, 3072, 1, 64]> input_23 = expand_dims(axes = input_23_axes_0, x = var_616)[name = string("input_23")];
            string input_25_pad_type_0 = const()[name = string("input_25_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> input_25_strides_0 = const()[name = string("input_25_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> input_25_pad_0 = const()[name = string("input_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> input_25_dilations_0 = const()[name = string("input_25_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 input_25_groups_0 = const()[name = string("input_25_groups_0"), val = int32(1)];
            tensor<fp16, [1, 8192, 1, 64]> input_25 = conv(dilations = input_25_dilations_0, groups = input_25_groups_0, pad = input_25_pad_0, pad_type = input_25_pad_type_0, strides = input_25_strides_0, weight = model_model_layers_8_mlp_gate_proj_weight_palettized, x = input_23)[name = string("input_25")];
            string up_states_3_pad_type_0 = const()[name = string("up_states_3_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> up_states_3_strides_0 = const()[name = string("up_states_3_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> up_states_3_pad_0 = const()[name = string("up_states_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> up_states_3_dilations_0 = const()[name = string("up_states_3_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 up_states_3_groups_0 = const()[name = string("up_states_3_groups_0"), val = int32(1)];
            tensor<fp16, [1, 8192, 1, 64]> up_states_3 = conv(dilations = up_states_3_dilations_0, groups = up_states_3_groups_0, pad = up_states_3_pad_0, pad_type = up_states_3_pad_type_0, strides = up_states_3_strides_0, weight = model_model_layers_8_mlp_up_proj_weight_palettized, x = input_23)[name = string("up_states_3")];
            tensor<fp16, [1, 8192, 1, 64]> gate_states_3 = silu(x = input_25)[name = string("gate_states_3")];
            tensor<fp16, [1, 8192, 1, 64]> input_27 = mul(x = gate_states_3, y = up_states_3)[name = string("input_27")];
            string hidden_states_15_pad_type_0 = const()[name = string("hidden_states_15_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> hidden_states_15_strides_0 = const()[name = string("hidden_states_15_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> hidden_states_15_pad_0 = const()[name = string("hidden_states_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> hidden_states_15_dilations_0 = const()[name = string("hidden_states_15_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 hidden_states_15_groups_0 = const()[name = string("hidden_states_15_groups_0"), val = int32(1)];
            tensor<fp16, [1, 3072, 1, 64]> hidden_states_15 = conv(dilations = hidden_states_15_dilations_0, groups = hidden_states_15_groups_0, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = hidden_states_15_strides_0, weight = model_model_layers_8_mlp_down_proj_weight_palettized, x = input_27)[name = string("hidden_states_15")];
            tensor<int32, [1]> var_638_axes_0 = const()[name = string("op_638_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 3072, 64]> var_638 = squeeze(axes = var_638_axes_0, x = hidden_states_15)[name = string("op_638")];
            tensor<int32, [3]> var_639 = const()[name = string("op_639"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 64, 3072]> var_640 = transpose(perm = var_639, x = var_638)[name = string("transpose_35")];
            tensor<fp16, [1, 64, 3072]> hidden_states_17_cast_fp16 = add(x = hidden_states_13_cast_fp16, y = var_640)[name = string("hidden_states_17_cast_fp16")];
            tensor<int32, [1]> mean_9_axes_0 = const()[name = string("mean_9_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_9_keep_dims_0 = const()[name = string("mean_9_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 64, 1]> mean_9_cast_fp16 = reduce_mean(axes = mean_9_axes_0, keep_dims = mean_9_keep_dims_0, x = hidden_states_17_cast_fp16)[name = string("mean_9_cast_fp16")];
            tensor<fp16, [1, 64, 3072]> input_29_cast_fp16 = sub(x = hidden_states_17_cast_fp16, y = mean_9_cast_fp16)[name = string("input_29_cast_fp16")];
            tensor<int32, [1]> var_648_axes_0 = const()[name = string("op_648_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [3072]> model_model_layers_9_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_9_input_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736006144)))];
            tensor<fp16, [1, 64, 3072]> var_648_cast_fp16 = layer_norm(axes = var_648_axes_0, epsilon = var_48_to_fp16, gamma = model_model_layers_9_input_layernorm_weight_to_fp16, x = input_29_cast_fp16)[name = string("op_648_cast_fp16")];
            tensor<int32, [3]> var_652 = const()[name = string("op_652"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> var_654_axes_0 = const()[name = string("op_654_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 3072, 64]> var_653 = transpose(perm = var_652, x = var_648_cast_fp16)[name = string("transpose_34")];
            tensor<fp16, [1, 3072, 1, 64]> var_654 = expand_dims(axes = var_654_axes_0, x = var_653)[name = string("op_654")];
            string query_states_9_pad_type_0 = const()[name = string("query_states_9_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> query_states_9_strides_0 = const()[name = string("query_states_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> query_states_9_pad_0 = const()[name = string("query_states_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> query_states_9_dilations_0 = const()[name = string("query_states_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 query_states_9_groups_0 = const()[name = string("query_states_9_groups_0"), val = int32(1)];
            tensor<fp16, [1, 3072, 1, 64]> query_states_9 = conv(dilations = query_states_9_dilations_0, groups = query_states_9_groups_0, pad = query_states_9_pad_0, pad_type = query_states_9_pad_type_0, strides = query_states_9_strides_0, weight = model_model_layers_9_self_attn_q_proj_weight_palettized, x = var_654)[name = string("query_states_9")];
            string key_states_13_pad_type_0 = const()[name = string("key_states_13_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> key_states_13_strides_0 = const()[name = string("key_states_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> key_states_13_pad_0 = const()[name = string("key_states_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> key_states_13_dilations_0 = const()[name = string("key_states_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 key_states_13_groups_0 = const()[name = string("key_states_13_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 64]> key_states_13 = conv(dilations = key_states_13_dilations_0, groups = key_states_13_groups_0, pad = key_states_13_pad_0, pad_type = key_states_13_pad_type_0, strides = key_states_13_strides_0, weight = model_model_layers_9_self_attn_k_proj_weight_palettized, x = var_654)[name = string("key_states_13")];
            string value_states_13_pad_type_0 = const()[name = string("value_states_13_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> value_states_13_strides_0 = const()[name = string("value_states_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> value_states_13_pad_0 = const()[name = string("value_states_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> value_states_13_dilations_0 = const()[name = string("value_states_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 value_states_13_groups_0 = const()[name = string("value_states_13_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 64]> value_states_13 = conv(dilations = value_states_13_dilations_0, groups = value_states_13_groups_0, pad = value_states_13_pad_0, pad_type = value_states_13_pad_type_0, strides = value_states_13_strides_0, weight = model_model_layers_9_self_attn_v_proj_weight_palettized, x = var_654)[name = string("value_states_13")];
            tensor<int32, [4]> var_674 = const()[name = string("op_674"), val = tensor<int32, [4]>([1, 24, 128, 64])];
            tensor<fp16, [1, 24, 128, 64]> var_675 = reshape(shape = var_674, x = query_states_9)[name = string("op_675")];
            tensor<int32, [4]> var_676 = const()[name = string("op_676"), val = tensor<int32, [4]>([0, 1, 3, 2])];
            tensor<int32, [4]> var_678 = const()[name = string("op_678"), val = tensor<int32, [4]>([1, 8, 128, 64])];
            tensor<fp16, [1, 8, 128, 64]> var_679 = reshape(shape = var_678, x = key_states_13)[name = string("op_679")];
            tensor<int32, [4]> var_680 = const()[name = string("op_680"), val = tensor<int32, [4]>([0, 1, 3, 2])];
            tensor<int32, [4]> var_682 = const()[name = string("op_682"), val = tensor<int32, [4]>([1, 8, 128, 64])];
            tensor<fp16, [1, 8, 128, 64]> var_683 = reshape(shape = var_682, x = value_states_13)[name = string("op_683")];
            tensor<int32, [4]> var_684 = const()[name = string("op_684"), val = tensor<int32, [4]>([0, 1, 3, 2])];
            tensor<int32, [4]> x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor<int32, [4]>([1, 24, 64, 64])];
            tensor<bool, [4]> x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 24, 64, 128]> x_57 = transpose(perm = var_676, x = var_675)[name = string("transpose_33")];
            tensor<fp16, [1, 24, 64, 64]> x1_9 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = x_57)[name = string("x1_9")];
            tensor<int32, [4]> x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor<int32, [4]>([1, 24, 64, 128])];
            tensor<bool, [4]> x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 24, 64, 64]> x2_9 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = x_57)[name = string("x2_9")];
            tensor<fp16, [1, 24, 64, 64]> var_702 = mul(x = x1_9, y = cos_7)[name = string("op_702")];
            tensor<fp16, [1, 24, 64, 64]> var_703 = mul(x = x2_9, y = sin_7)[name = string("op_703")];
            tensor<fp16, [1, 24, 64, 64]> var_704 = sub(x = var_702, y = var_703)[name = string("op_704")];
            tensor<fp16, [1, 24, 64, 64]> var_705 = mul(x = x2_9, y = cos_7)[name = string("op_705")];
            tensor<fp16, [1, 24, 64, 64]> var_706 = mul(x = x1_9, y = sin_7)[name = string("op_706")];
            tensor<fp16, [1, 24, 64, 64]> var_707 = add(x = var_705, y = var_706)[name = string("op_707")];
            bool rotated_9_interleave_0 = const()[name = string("rotated_9_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 24, 64, 128]> rotated_9 = concat(axis = var_46, interleave = rotated_9_interleave_0, values = (var_704, var_707))[name = string("rotated_9")];
            tensor<int32, [4]> x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor<int32, [4]>([1, 8, 64, 64])];
            tensor<bool, [4]> x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 8, 64, 128]> x_61 = transpose(perm = var_680, x = var_679)[name = string("transpose_32")];
            tensor<fp16, [1, 8, 64, 64]> x1_11 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = x_61)[name = string("x1_11")];
            tensor<int32, [4]> x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor<int32, [4]>([1, 8, 64, 128])];
            tensor<bool, [4]> x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 8, 64, 64]> x2_11 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = x_61)[name = string("x2_11")];
            tensor<fp16, [1, 8, 64, 64]> var_723 = mul(x = x1_11, y = cos_7)[name = string("op_723")];
            tensor<fp16, [1, 8, 64, 64]> var_724 = mul(x = x2_11, y = sin_7)[name = string("op_724")];
            tensor<fp16, [1, 8, 64, 64]> var_725 = sub(x = var_723, y = var_724)[name = string("op_725")];
            tensor<fp16, [1, 8, 64, 64]> var_726 = mul(x = x2_11, y = cos_7)[name = string("op_726")];
            tensor<fp16, [1, 8, 64, 64]> var_727 = mul(x = x1_11, y = sin_7)[name = string("op_727")];
            tensor<fp16, [1, 8, 64, 64]> var_728 = add(x = var_726, y = var_727)[name = string("op_728")];
            bool rotated_11_interleave_0 = const()[name = string("rotated_11_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 8, 64, 128]> rotated_11 = concat(axis = var_46, interleave = rotated_11_interleave_0, values = (var_725, var_728))[name = string("rotated_11")];
            tensor<int32, [1]> expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor<int32, [1]>([9])];
            tensor<int32, [1]> expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_28 = const()[name = string("expand_dims_28"), val = tensor<int32, [1]>([10])];
            int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)];
            bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (expand_dims_24, expand_dims_25, current_pos, expand_dims_27))[name = string("concat_38")];
            tensor<int32, [1]> concat_39_values1_0 = const()[name = string("concat_39_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_39_values3_0 = const()[name = string("concat_39_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_39_axis_0 = const()[name = string("concat_39_axis_0"), val = int32(0)];
            bool concat_39_interleave_0 = const()[name = string("concat_39_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_39 = concat(axis = concat_39_axis_0, interleave = concat_39_interleave_0, values = (expand_dims_28, concat_39_values1_0, var_343, concat_39_values3_0))[name = string("concat_39")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_5_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_38, begin_mask = model_model_kv_cache_0_internal_tensor_assign_5_begin_mask_0, end = concat_39, end_mask = model_model_kv_cache_0_internal_tensor_assign_5_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_5_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_5_stride_0, update = rotated_11, x = coreml_update_state_17)[name = string("model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_5_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_18_write_state")];
            tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_18 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_18")];
            tensor<int32, [1]> expand_dims_30 = const()[name = string("expand_dims_30"), val = tensor<int32, [1]>([37])];
            tensor<int32, [1]> expand_dims_31 = const()[name = string("expand_dims_31"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_33 = const()[name = string("expand_dims_33"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_34 = const()[name = string("expand_dims_34"), val = tensor<int32, [1]>([38])];
            int32 concat_42_axis_0 = const()[name = string("concat_42_axis_0"), val = int32(0)];
            bool concat_42_interleave_0 = const()[name = string("concat_42_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_42 = concat(axis = concat_42_axis_0, interleave = concat_42_interleave_0, values = (expand_dims_30, expand_dims_31, current_pos, expand_dims_33))[name = string("concat_42")];
            tensor<int32, [1]> concat_43_values1_0 = const()[name = string("concat_43_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_43_values3_0 = const()[name = string("concat_43_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)];
            bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (expand_dims_34, concat_43_values1_0, var_343, concat_43_values3_0))[name = string("concat_43")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_6_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [1, 8, 64, 128]> value_states_15 = transpose(perm = var_684, x = var_683)[name = string("transpose_31")];
            tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_42, begin_mask = model_model_kv_cache_0_internal_tensor_assign_6_begin_mask_0, end = concat_43, end_mask = model_model_kv_cache_0_internal_tensor_assign_6_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_6_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_6_stride_0, update = value_states_15, x = coreml_update_state_18)[name = string("model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_6_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_19_write_state")];
            tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_19 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_19")];
            tensor<int32, [4]> var_751_begin_0 = const()[name = string("op_751_begin_0"), val = tensor<int32, [4]>([9, 0, 0, 0])];
            tensor<int32, [4]> var_751_end_0 = const()[name = string("op_751_end_0"), val = tensor<int32, [4]>([10, 8, 1024, 128])];
            tensor<bool, [4]> var_751_end_mask_0 = const()[name = string("op_751_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_751_cast_fp16 = slice_by_index(begin = var_751_begin_0, end = var_751_end_0, end_mask = var_751_end_mask_0, x = coreml_update_state_19)[name = string("op_751_cast_fp16")];
            tensor<int32, [1]> K_layer_cache_5_axes_0 = const()[name = string("K_layer_cache_5_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> K_layer_cache_5_cast_fp16 = squeeze(axes = K_layer_cache_5_axes_0, x = var_751_cast_fp16)[name = string("K_layer_cache_5_cast_fp16")];
            tensor<int32, [4]> var_753_begin_0 = const()[name = string("op_753_begin_0"), val = tensor<int32, [4]>([37, 0, 0, 0])];
            tensor<int32, [4]> var_753_end_0 = const()[name = string("op_753_end_0"), val = tensor<int32, [4]>([38, 8, 1024, 128])];
            tensor<bool, [4]> var_753_end_mask_0 = const()[name = string("op_753_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_753_cast_fp16 = slice_by_index(begin = var_753_begin_0, end = var_753_end_0, end_mask = var_753_end_mask_0, x = coreml_update_state_19)[name = string("op_753_cast_fp16")];
            tensor<int32, [1]> V_layer_cache_5_axes_0 = const()[name = string("V_layer_cache_5_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> V_layer_cache_5_cast_fp16 = squeeze(axes = V_layer_cache_5_axes_0, x = var_753_cast_fp16)[name = string("V_layer_cache_5_cast_fp16")];
            tensor<int32, [1]> x_67_axes_0 = const()[name = string("x_67_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_67_cast_fp16 = expand_dims(axes = x_67_axes_0, x = K_layer_cache_5_cast_fp16)[name = string("x_67_cast_fp16")];
            tensor<int32, [4]> var_762 = const()[name = string("op_762"), val = tensor<int32, [4]>([1, 3, 1, 1])];
            tensor<fp16, [8, 3, 1024, 128]> x_69_cast_fp16 = tile(reps = var_762, x = x_67_cast_fp16)[name = string("x_69_cast_fp16")];
            tensor<int32, [4]> var_766 = const()[name = string("op_766"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
            tensor<fp16, [1, 24, 1024, 128]> var_767_cast_fp16 = reshape(shape = var_766, x = x_69_cast_fp16)[name = string("op_767_cast_fp16")];
            tensor<int32, [1]> x_73_axes_0 = const()[name = string("x_73_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_73_cast_fp16 = expand_dims(axes = x_73_axes_0, x = V_layer_cache_5_cast_fp16)[name = string("x_73_cast_fp16")];
            tensor<int32, [4]> var_769 = const()[name = string("op_769"), val = tensor<int32, [4]>([1, 3, 1, 1])];
            tensor<fp16, [8, 3, 1024, 128]> x_75_cast_fp16 = tile(reps = var_769, x = x_73_cast_fp16)[name = string("x_75_cast_fp16")];
            bool var_776_transpose_x_0 = const()[name = string("op_776_transpose_x_0"), val = bool(false)];
            bool var_776_transpose_y_0 = const()[name = string("op_776_transpose_y_0"), val = bool(true)];
            tensor<fp16, [1, 24, 64, 1024]> var_776_cast_fp16 = matmul(transpose_x = var_776_transpose_x_0, transpose_y = var_776_transpose_y_0, x = rotated_9, y = var_767_cast_fp16)[name = string("op_776_cast_fp16")];
            fp16 var_777_to_fp16 = const()[name = string("op_777_to_fp16"), val = fp16(0x1.6ap-4)];
            tensor<fp16, [1, 24, 64, 1024]> attn_weights_5_cast_fp16 = mul(x = var_776_cast_fp16, y = var_777_to_fp16)[name = string("attn_weights_5_cast_fp16")];
            tensor<fp16, [1, 24, 64, 1024]> x_77_cast_fp16 = add(x = attn_weights_5_cast_fp16, y = causal_mask)[name = string("x_77_cast_fp16")];
            tensor<int32, [1]> reduce_max_2_axes_0 = const()[name = string("reduce_max_2_axes_0"), val = tensor<int32, [1]>([-1])];
            bool reduce_max_2_keep_dims_0 = const()[name = string("reduce_max_2_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 24, 64, 1]> reduce_max_2_cast_fp16 = reduce_max(axes = reduce_max_2_axes_0, keep_dims = reduce_max_2_keep_dims_0, x = x_77_cast_fp16)[name = string("reduce_max_2_cast_fp16")];
            tensor<fp16, [1, 24, 64, 1024]> x_79_cast_fp16 = sub(x = x_77_cast_fp16, y = reduce_max_2_cast_fp16)[name = string("x_79_cast_fp16")];
            tensor<fp16, [1, 24, 64, 1024]> exp_x_5_cast_fp16 = exp(x = x_79_cast_fp16)[name = string("exp_x_5_cast_fp16")];
            tensor<int32, [1]> var_788_axes_0 = const()[name = string("op_788_axes_0"), val = tensor<int32, [1]>([-1])];
            bool var_788_keep_dims_0 = const()[name = string("op_788_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 24, 64, 1]> var_788_cast_fp16 = reduce_sum(axes = var_788_axes_0, keep_dims = var_788_keep_dims_0, x = exp_x_5_cast_fp16)[name = string("op_788_cast_fp16")];
            tensor<fp16, [1, 24, 64, 1024]> var_789_cast_fp16 = real_div(x = exp_x_5_cast_fp16, y = var_788_cast_fp16)[name = string("op_789_cast_fp16")];
            tensor<int32, [3]> concat_48 = const()[name = string("concat_48"), val = tensor<int32, [3]>([24, 64, 1024])];
            tensor<fp16, [24, 64, 1024]> reshape_6_cast_fp16 = reshape(shape = concat_48, x = var_789_cast_fp16)[name = string("reshape_6_cast_fp16")];
            tensor<int32, [3]> concat_49 = const()[name = string("concat_49"), val = tensor<int32, [3]>([24, 1024, 128])];
            tensor<fp16, [24, 1024, 128]> reshape_7_cast_fp16 = reshape(shape = concat_49, x = x_75_cast_fp16)[name = string("reshape_7_cast_fp16")];
            bool matmul_2_transpose_x_0 = const()[name = string("matmul_2_transpose_x_0"), val = bool(false)];
            bool matmul_2_transpose_y_0 = const()[name = string("matmul_2_transpose_y_0"), val = bool(false)];
            tensor<fp16, [24, 64, 128]> matmul_2_cast_fp16 = matmul(transpose_x = matmul_2_transpose_x_0, transpose_y = matmul_2_transpose_y_0, x = reshape_6_cast_fp16, y = reshape_7_cast_fp16)[name = string("matmul_2_cast_fp16")];
            tensor<int32, [4]> concat_53 = const()[name = string("concat_53"), val = tensor<int32, [4]>([1, 24, 64, 128])];
            tensor<fp16, [1, 24, 64, 128]> reshape_8_cast_fp16 = reshape(shape = concat_53, x = matmul_2_cast_fp16)[name = string("reshape_8_cast_fp16")];
            tensor<int32, [4]> var_792_perm_0 = const()[name = string("op_792_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_794 = const()[name = string("op_794"), val = tensor<int32, [3]>([1, 64, 3072])];
            tensor<fp16, [1, 64, 24, 128]> var_792_cast_fp16 = transpose(perm = var_792_perm_0, x = reshape_8_cast_fp16)[name = string("transpose_30")];
            tensor<fp16, [1, 64, 3072]> input_33_cast_fp16 = reshape(shape = var_794, x = var_792_cast_fp16)[name = string("input_33_cast_fp16")];
            tensor<fp16, [3072, 3072]> model_model_layers_9_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(736012352))), lut = tensor<fp16, [384, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(745449600))))[name = string("model_model_layers_9_self_attn_o_proj_weight_promoted_to_fp16_palettized")];
            tensor<fp16, [1, 64, 3072]> linear_2_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_9_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_33_cast_fp16)[name = string("linear_2_cast_fp16")];
            tensor<fp16, [1, 64, 3072]> hidden_states_21_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = linear_2_cast_fp16)[name = string("hidden_states_21_cast_fp16")];
            tensor<int32, [1]> mean_11_axes_0 = const()[name = string("mean_11_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_11_keep_dims_0 = const()[name = string("mean_11_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 64, 1]> mean_11_cast_fp16 = reduce_mean(axes = mean_11_axes_0, keep_dims = mean_11_keep_dims_0, x = hidden_states_21_cast_fp16)[name = string("mean_11_cast_fp16")];
            tensor<fp16, [1, 64, 3072]> input_35_cast_fp16 = sub(x = hidden_states_21_cast_fp16, y = mean_11_cast_fp16)[name = string("input_35_cast_fp16")];
            tensor<int32, [1]> var_805_axes_0 = const()[name = string("op_805_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [3072]> model_model_layers_9_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_9_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(745646272)))];
            tensor<fp16, [1, 64, 3072]> var_805_cast_fp16 = layer_norm(axes = var_805_axes_0, epsilon = var_48_to_fp16, gamma = model_model_layers_9_post_attention_layernorm_weight_to_fp16, x = input_35_cast_fp16)[name = string("op_805_cast_fp16")];
            tensor<int32, [3]> var_812 = const()[name = string("op_812"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> input_37_axes_0 = const()[name = string("input_37_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 3072, 64]> var_813 = transpose(perm = var_812, x = var_805_cast_fp16)[name = string("transpose_29")];
            tensor<fp16, [1, 3072, 1, 64]> input_37 = expand_dims(axes = input_37_axes_0, x = var_813)[name = string("input_37")];
            string input_39_pad_type_0 = const()[name = string("input_39_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> input_39_strides_0 = const()[name = string("input_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> input_39_pad_0 = const()[name = string("input_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> input_39_dilations_0 = const()[name = string("input_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 input_39_groups_0 = const()[name = string("input_39_groups_0"), val = int32(1)];
            tensor<fp16, [1, 8192, 1, 64]> input_39 = conv(dilations = input_39_dilations_0, groups = input_39_groups_0, pad = input_39_pad_0, pad_type = input_39_pad_type_0, strides = input_39_strides_0, weight = model_model_layers_9_mlp_gate_proj_weight_palettized, x = input_37)[name = string("input_39")];
            string up_states_5_pad_type_0 = const()[name = string("up_states_5_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> up_states_5_strides_0 = const()[name = string("up_states_5_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> up_states_5_pad_0 = const()[name = string("up_states_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> up_states_5_dilations_0 = const()[name = string("up_states_5_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 up_states_5_groups_0 = const()[name = string("up_states_5_groups_0"), val = int32(1)];
            tensor<fp16, [1, 8192, 1, 64]> up_states_5 = conv(dilations = up_states_5_dilations_0, groups = up_states_5_groups_0, pad = up_states_5_pad_0, pad_type = up_states_5_pad_type_0, strides = up_states_5_strides_0, weight = model_model_layers_9_mlp_up_proj_weight_palettized, x = input_37)[name = string("up_states_5")];
            tensor<fp16, [1, 8192, 1, 64]> gate_states_5 = silu(x = input_39)[name = string("gate_states_5")];
            tensor<fp16, [1, 8192, 1, 64]> input_41 = mul(x = gate_states_5, y = up_states_5)[name = string("input_41")];
            string hidden_states_23_pad_type_0 = const()[name = string("hidden_states_23_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> hidden_states_23_strides_0 = const()[name = string("hidden_states_23_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> hidden_states_23_pad_0 = const()[name = string("hidden_states_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> hidden_states_23_dilations_0 = const()[name = string("hidden_states_23_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 hidden_states_23_groups_0 = const()[name = string("hidden_states_23_groups_0"), val = int32(1)];
            tensor<fp16, [1, 3072, 1, 64]> hidden_states_23 = conv(dilations = hidden_states_23_dilations_0, groups = hidden_states_23_groups_0, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = hidden_states_23_strides_0, weight = model_model_layers_9_mlp_down_proj_weight_palettized, x = input_41)[name = string("hidden_states_23")];
            tensor<int32, [1]> var_835_axes_0 = const()[name = string("op_835_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 3072, 64]> var_835 = squeeze(axes = var_835_axes_0, x = hidden_states_23)[name = string("op_835")];
            tensor<int32, [3]> var_836 = const()[name = string("op_836"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 64, 3072]> var_837 = transpose(perm = var_836, x = var_835)[name = string("transpose_28")];
            tensor<fp16, [1, 64, 3072]> hidden_states_25_cast_fp16 = add(x = hidden_states_21_cast_fp16, y = var_837)[name = string("hidden_states_25_cast_fp16")];
            tensor<int32, [1]> mean_13_axes_0 = const()[name = string("mean_13_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_13_keep_dims_0 = const()[name = string("mean_13_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 64, 1]> mean_13_cast_fp16 = reduce_mean(axes = mean_13_axes_0, keep_dims = mean_13_keep_dims_0, x = hidden_states_25_cast_fp16)[name = string("mean_13_cast_fp16")];
            tensor<fp16, [1, 64, 3072]> input_43_cast_fp16 = sub(x = hidden_states_25_cast_fp16, y = mean_13_cast_fp16)[name = string("input_43_cast_fp16")];
            tensor<int32, [1]> var_845_axes_0 = const()[name = string("op_845_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [3072]> model_model_layers_10_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_10_input_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(745652480)))];
            tensor<fp16, [1, 64, 3072]> var_845_cast_fp16 = layer_norm(axes = var_845_axes_0, epsilon = var_48_to_fp16, gamma = model_model_layers_10_input_layernorm_weight_to_fp16, x = input_43_cast_fp16)[name = string("op_845_cast_fp16")];
            tensor<int32, [3]> var_849 = const()[name = string("op_849"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> var_851_axes_0 = const()[name = string("op_851_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 3072, 64]> var_850 = transpose(perm = var_849, x = var_845_cast_fp16)[name = string("transpose_27")];
            tensor<fp16, [1, 3072, 1, 64]> var_851 = expand_dims(axes = var_851_axes_0, x = var_850)[name = string("op_851")];
            string query_states_13_pad_type_0 = const()[name = string("query_states_13_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> query_states_13_strides_0 = const()[name = string("query_states_13_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> query_states_13_pad_0 = const()[name = string("query_states_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> query_states_13_dilations_0 = const()[name = string("query_states_13_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 query_states_13_groups_0 = const()[name = string("query_states_13_groups_0"), val = int32(1)];
            tensor<fp16, [1, 3072, 1, 64]> query_states_13 = conv(dilations = query_states_13_dilations_0, groups = query_states_13_groups_0, pad = query_states_13_pad_0, pad_type = query_states_13_pad_type_0, strides = query_states_13_strides_0, weight = model_model_layers_10_self_attn_q_proj_weight_palettized, x = var_851)[name = string("query_states_13")];
            string key_states_19_pad_type_0 = const()[name = string("key_states_19_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> key_states_19_strides_0 = const()[name = string("key_states_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> key_states_19_pad_0 = const()[name = string("key_states_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> key_states_19_dilations_0 = const()[name = string("key_states_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 key_states_19_groups_0 = const()[name = string("key_states_19_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 64]> key_states_19 = conv(dilations = key_states_19_dilations_0, groups = key_states_19_groups_0, pad = key_states_19_pad_0, pad_type = key_states_19_pad_type_0, strides = key_states_19_strides_0, weight = model_model_layers_10_self_attn_k_proj_weight_palettized, x = var_851)[name = string("key_states_19")];
            string value_states_19_pad_type_0 = const()[name = string("value_states_19_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> value_states_19_strides_0 = const()[name = string("value_states_19_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> value_states_19_pad_0 = const()[name = string("value_states_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> value_states_19_dilations_0 = const()[name = string("value_states_19_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 value_states_19_groups_0 = const()[name = string("value_states_19_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 64]> value_states_19 = conv(dilations = value_states_19_dilations_0, groups = value_states_19_groups_0, pad = value_states_19_pad_0, pad_type = value_states_19_pad_type_0, strides = value_states_19_strides_0, weight = model_model_layers_10_self_attn_v_proj_weight_palettized, x = var_851)[name = string("value_states_19")];
            tensor<int32, [4]> var_871 = const()[name = string("op_871"), val = tensor<int32, [4]>([1, 24, 128, 64])];
            tensor<fp16, [1, 24, 128, 64]> var_872 = reshape(shape = var_871, x = query_states_13)[name = string("op_872")];
            tensor<int32, [4]> var_873 = const()[name = string("op_873"), val = tensor<int32, [4]>([0, 1, 3, 2])];
            tensor<int32, [4]> var_875 = const()[name = string("op_875"), val = tensor<int32, [4]>([1, 8, 128, 64])];
            tensor<fp16, [1, 8, 128, 64]> var_876 = reshape(shape = var_875, x = key_states_19)[name = string("op_876")];
            tensor<int32, [4]> var_877 = const()[name = string("op_877"), val = tensor<int32, [4]>([0, 1, 3, 2])];
            tensor<int32, [4]> var_879 = const()[name = string("op_879"), val = tensor<int32, [4]>([1, 8, 128, 64])];
            tensor<fp16, [1, 8, 128, 64]> var_880 = reshape(shape = var_879, x = value_states_19)[name = string("op_880")];
            tensor<int32, [4]> var_881 = const()[name = string("op_881"), val = tensor<int32, [4]>([0, 1, 3, 2])];
            tensor<int32, [4]> x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor<int32, [4]>([1, 24, 64, 64])];
            tensor<bool, [4]> x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 24, 64, 128]> x_85 = transpose(perm = var_873, x = var_872)[name = string("transpose_26")];
            tensor<fp16, [1, 24, 64, 64]> x1_13 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = x_85)[name = string("x1_13")];
            tensor<int32, [4]> x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor<int32, [4]>([1, 24, 64, 128])];
            tensor<bool, [4]> x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 24, 64, 64]> x2_13 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = x_85)[name = string("x2_13")];
            tensor<fp16, [1, 24, 64, 64]> var_899 = mul(x = x1_13, y = cos_7)[name = string("op_899")];
            tensor<fp16, [1, 24, 64, 64]> var_900 = mul(x = x2_13, y = sin_7)[name = string("op_900")];
            tensor<fp16, [1, 24, 64, 64]> var_901 = sub(x = var_899, y = var_900)[name = string("op_901")];
            tensor<fp16, [1, 24, 64, 64]> var_902 = mul(x = x2_13, y = cos_7)[name = string("op_902")];
            tensor<fp16, [1, 24, 64, 64]> var_903 = mul(x = x1_13, y = sin_7)[name = string("op_903")];
            tensor<fp16, [1, 24, 64, 64]> var_904 = add(x = var_902, y = var_903)[name = string("op_904")];
            bool rotated_13_interleave_0 = const()[name = string("rotated_13_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 24, 64, 128]> rotated_13 = concat(axis = var_46, interleave = rotated_13_interleave_0, values = (var_901, var_904))[name = string("rotated_13")];
            tensor<int32, [4]> x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor<int32, [4]>([1, 8, 64, 64])];
            tensor<bool, [4]> x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 8, 64, 128]> x_89 = transpose(perm = var_877, x = var_876)[name = string("transpose_25")];
            tensor<fp16, [1, 8, 64, 64]> x1_15 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = x_89)[name = string("x1_15")];
            tensor<int32, [4]> x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor<int32, [4]>([1, 8, 64, 128])];
            tensor<bool, [4]> x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 8, 64, 64]> x2_15 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = x_89)[name = string("x2_15")];
            tensor<fp16, [1, 8, 64, 64]> var_920 = mul(x = x1_15, y = cos_7)[name = string("op_920")];
            tensor<fp16, [1, 8, 64, 64]> var_921 = mul(x = x2_15, y = sin_7)[name = string("op_921")];
            tensor<fp16, [1, 8, 64, 64]> var_922 = sub(x = var_920, y = var_921)[name = string("op_922")];
            tensor<fp16, [1, 8, 64, 64]> var_923 = mul(x = x2_15, y = cos_7)[name = string("op_923")];
            tensor<fp16, [1, 8, 64, 64]> var_924 = mul(x = x1_15, y = sin_7)[name = string("op_924")];
            tensor<fp16, [1, 8, 64, 64]> var_925 = add(x = var_923, y = var_924)[name = string("op_925")];
            bool rotated_15_interleave_0 = const()[name = string("rotated_15_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 8, 64, 128]> rotated_15 = concat(axis = var_46, interleave = rotated_15_interleave_0, values = (var_922, var_925))[name = string("rotated_15")];
            tensor<int32, [1]> expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor<int32, [1]>([10])];
            tensor<int32, [1]> expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_40 = const()[name = string("expand_dims_40"), val = tensor<int32, [1]>([11])];
            int32 concat_56_axis_0 = const()[name = string("concat_56_axis_0"), val = int32(0)];
            bool concat_56_interleave_0 = const()[name = string("concat_56_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_56 = concat(axis = concat_56_axis_0, interleave = concat_56_interleave_0, values = (expand_dims_36, expand_dims_37, current_pos, expand_dims_39))[name = string("concat_56")];
            tensor<int32, [1]> concat_57_values1_0 = const()[name = string("concat_57_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_57_values3_0 = const()[name = string("concat_57_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_57_axis_0 = const()[name = string("concat_57_axis_0"), val = int32(0)];
            bool concat_57_interleave_0 = const()[name = string("concat_57_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_57 = concat(axis = concat_57_axis_0, interleave = concat_57_interleave_0, values = (expand_dims_40, concat_57_values1_0, var_343, concat_57_values3_0))[name = string("concat_57")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_7_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_56, begin_mask = model_model_kv_cache_0_internal_tensor_assign_7_begin_mask_0, end = concat_57, end_mask = model_model_kv_cache_0_internal_tensor_assign_7_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_7_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_7_stride_0, update = rotated_15, x = coreml_update_state_19)[name = string("model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_7_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_20_write_state")];
            tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_20 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_20")];
            tensor<int32, [1]> expand_dims_42 = const()[name = string("expand_dims_42"), val = tensor<int32, [1]>([38])];
            tensor<int32, [1]> expand_dims_43 = const()[name = string("expand_dims_43"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_45 = const()[name = string("expand_dims_45"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_46 = const()[name = string("expand_dims_46"), val = tensor<int32, [1]>([39])];
            int32 concat_60_axis_0 = const()[name = string("concat_60_axis_0"), val = int32(0)];
            bool concat_60_interleave_0 = const()[name = string("concat_60_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_60 = concat(axis = concat_60_axis_0, interleave = concat_60_interleave_0, values = (expand_dims_42, expand_dims_43, current_pos, expand_dims_45))[name = string("concat_60")];
            tensor<int32, [1]> concat_61_values1_0 = const()[name = string("concat_61_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_61_values3_0 = const()[name = string("concat_61_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_61_axis_0 = const()[name = string("concat_61_axis_0"), val = int32(0)];
            bool concat_61_interleave_0 = const()[name = string("concat_61_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_61 = concat(axis = concat_61_axis_0, interleave = concat_61_interleave_0, values = (expand_dims_46, concat_61_values1_0, var_343, concat_61_values3_0))[name = string("concat_61")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_8_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [1, 8, 64, 128]> value_states_21 = transpose(perm = var_881, x = var_880)[name = string("transpose_24")];
            tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_60, begin_mask = model_model_kv_cache_0_internal_tensor_assign_8_begin_mask_0, end = concat_61, end_mask = model_model_kv_cache_0_internal_tensor_assign_8_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_8_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_8_stride_0, update = value_states_21, x = coreml_update_state_20)[name = string("model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_8_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_21_write_state")];
            tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_21 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_21")];
            tensor<int32, [4]> var_948_begin_0 = const()[name = string("op_948_begin_0"), val = tensor<int32, [4]>([10, 0, 0, 0])];
            tensor<int32, [4]> var_948_end_0 = const()[name = string("op_948_end_0"), val = tensor<int32, [4]>([11, 8, 1024, 128])];
            tensor<bool, [4]> var_948_end_mask_0 = const()[name = string("op_948_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_948_cast_fp16 = slice_by_index(begin = var_948_begin_0, end = var_948_end_0, end_mask = var_948_end_mask_0, x = coreml_update_state_21)[name = string("op_948_cast_fp16")];
            tensor<int32, [1]> K_layer_cache_7_axes_0 = const()[name = string("K_layer_cache_7_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> K_layer_cache_7_cast_fp16 = squeeze(axes = K_layer_cache_7_axes_0, x = var_948_cast_fp16)[name = string("K_layer_cache_7_cast_fp16")];
            tensor<int32, [4]> var_950_begin_0 = const()[name = string("op_950_begin_0"), val = tensor<int32, [4]>([38, 0, 0, 0])];
            tensor<int32, [4]> var_950_end_0 = const()[name = string("op_950_end_0"), val = tensor<int32, [4]>([39, 8, 1024, 128])];
            tensor<bool, [4]> var_950_end_mask_0 = const()[name = string("op_950_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_950_cast_fp16 = slice_by_index(begin = var_950_begin_0, end = var_950_end_0, end_mask = var_950_end_mask_0, x = coreml_update_state_21)[name = string("op_950_cast_fp16")];
            tensor<int32, [1]> V_layer_cache_7_axes_0 = const()[name = string("V_layer_cache_7_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> V_layer_cache_7_cast_fp16 = squeeze(axes = V_layer_cache_7_axes_0, x = var_950_cast_fp16)[name = string("V_layer_cache_7_cast_fp16")];
            tensor<int32, [1]> x_95_axes_0 = const()[name = string("x_95_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_95_cast_fp16 = expand_dims(axes = x_95_axes_0, x = K_layer_cache_7_cast_fp16)[name = string("x_95_cast_fp16")];
            tensor<int32, [4]> var_959 = const()[name = string("op_959"), val = tensor<int32, [4]>([1, 3, 1, 1])];
            tensor<fp16, [8, 3, 1024, 128]> x_97_cast_fp16 = tile(reps = var_959, x = x_95_cast_fp16)[name = string("x_97_cast_fp16")];
            tensor<int32, [4]> var_963 = const()[name = string("op_963"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
            tensor<fp16, [1, 24, 1024, 128]> var_964_cast_fp16 = reshape(shape = var_963, x = x_97_cast_fp16)[name = string("op_964_cast_fp16")];
            tensor<int32, [1]> x_101_axes_0 = const()[name = string("x_101_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_101_cast_fp16 = expand_dims(axes = x_101_axes_0, x = V_layer_cache_7_cast_fp16)[name = string("x_101_cast_fp16")];
            tensor<int32, [4]> var_966 = const()[name = string("op_966"), val = tensor<int32, [4]>([1, 3, 1, 1])];
            tensor<fp16, [8, 3, 1024, 128]> x_103_cast_fp16 = tile(reps = var_966, x = x_101_cast_fp16)[name = string("x_103_cast_fp16")];
            bool var_973_transpose_x_0 = const()[name = string("op_973_transpose_x_0"), val = bool(false)];
            bool var_973_transpose_y_0 = const()[name = string("op_973_transpose_y_0"), val = bool(true)];
            tensor<fp16, [1, 24, 64, 1024]> var_973_cast_fp16 = matmul(transpose_x = var_973_transpose_x_0, transpose_y = var_973_transpose_y_0, x = rotated_13, y = var_964_cast_fp16)[name = string("op_973_cast_fp16")];
            fp16 var_974_to_fp16 = const()[name = string("op_974_to_fp16"), val = fp16(0x1.6ap-4)];
            tensor<fp16, [1, 24, 64, 1024]> attn_weights_7_cast_fp16 = mul(x = var_973_cast_fp16, y = var_974_to_fp16)[name = string("attn_weights_7_cast_fp16")];
            tensor<fp16, [1, 24, 64, 1024]> x_105_cast_fp16 = add(x = attn_weights_7_cast_fp16, y = causal_mask)[name = string("x_105_cast_fp16")];
            tensor<int32, [1]> reduce_max_3_axes_0 = const()[name = string("reduce_max_3_axes_0"), val = tensor<int32, [1]>([-1])];
            bool reduce_max_3_keep_dims_0 = const()[name = string("reduce_max_3_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 24, 64, 1]> reduce_max_3_cast_fp16 = reduce_max(axes = reduce_max_3_axes_0, keep_dims = reduce_max_3_keep_dims_0, x = x_105_cast_fp16)[name = string("reduce_max_3_cast_fp16")];
            tensor<fp16, [1, 24, 64, 1024]> x_107_cast_fp16 = sub(x = x_105_cast_fp16, y = reduce_max_3_cast_fp16)[name = string("x_107_cast_fp16")];
            tensor<fp16, [1, 24, 64, 1024]> exp_x_7_cast_fp16 = exp(x = x_107_cast_fp16)[name = string("exp_x_7_cast_fp16")];
            tensor<int32, [1]> var_985_axes_0 = const()[name = string("op_985_axes_0"), val = tensor<int32, [1]>([-1])];
            bool var_985_keep_dims_0 = const()[name = string("op_985_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 24, 64, 1]> var_985_cast_fp16 = reduce_sum(axes = var_985_axes_0, keep_dims = var_985_keep_dims_0, x = exp_x_7_cast_fp16)[name = string("op_985_cast_fp16")];
            tensor<fp16, [1, 24, 64, 1024]> var_986_cast_fp16 = real_div(x = exp_x_7_cast_fp16, y = var_985_cast_fp16)[name = string("op_986_cast_fp16")];
            tensor<int32, [3]> concat_66 = const()[name = string("concat_66"), val = tensor<int32, [3]>([24, 64, 1024])];
            tensor<fp16, [24, 64, 1024]> reshape_9_cast_fp16 = reshape(shape = concat_66, x = var_986_cast_fp16)[name = string("reshape_9_cast_fp16")];
            tensor<int32, [3]> concat_67 = const()[name = string("concat_67"), val = tensor<int32, [3]>([24, 1024, 128])];
            tensor<fp16, [24, 1024, 128]> reshape_10_cast_fp16 = reshape(shape = concat_67, x = x_103_cast_fp16)[name = string("reshape_10_cast_fp16")];
            bool matmul_3_transpose_x_0 = const()[name = string("matmul_3_transpose_x_0"), val = bool(false)];
            bool matmul_3_transpose_y_0 = const()[name = string("matmul_3_transpose_y_0"), val = bool(false)];
            tensor<fp16, [24, 64, 128]> matmul_3_cast_fp16 = matmul(transpose_x = matmul_3_transpose_x_0, transpose_y = matmul_3_transpose_y_0, x = reshape_9_cast_fp16, y = reshape_10_cast_fp16)[name = string("matmul_3_cast_fp16")];
            tensor<int32, [4]> concat_71 = const()[name = string("concat_71"), val = tensor<int32, [4]>([1, 24, 64, 128])];
            tensor<fp16, [1, 24, 64, 128]> reshape_11_cast_fp16 = reshape(shape = concat_71, x = matmul_3_cast_fp16)[name = string("reshape_11_cast_fp16")];
            tensor<int32, [4]> var_989_perm_0 = const()[name = string("op_989_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_991 = const()[name = string("op_991"), val = tensor<int32, [3]>([1, 64, 3072])];
            tensor<fp16, [1, 64, 24, 128]> var_989_cast_fp16 = transpose(perm = var_989_perm_0, x = reshape_11_cast_fp16)[name = string("transpose_23")];
            tensor<fp16, [1, 64, 3072]> input_47_cast_fp16 = reshape(shape = var_991, x = var_989_cast_fp16)[name = string("input_47_cast_fp16")];
            tensor<fp16, [3072, 3072]> model_model_layers_10_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(745658688))), lut = tensor<fp16, [384, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755095936))))[name = string("model_model_layers_10_self_attn_o_proj_weight_promoted_to_fp16_palettized")];
            tensor<fp16, [1, 64, 3072]> linear_3_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_10_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_47_cast_fp16)[name = string("linear_3_cast_fp16")];
            tensor<fp16, [1, 64, 3072]> hidden_states_29_cast_fp16 = add(x = hidden_states_25_cast_fp16, y = linear_3_cast_fp16)[name = string("hidden_states_29_cast_fp16")];
            tensor<int32, [1]> mean_15_axes_0 = const()[name = string("mean_15_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_15_keep_dims_0 = const()[name = string("mean_15_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 64, 1]> mean_15_cast_fp16 = reduce_mean(axes = mean_15_axes_0, keep_dims = mean_15_keep_dims_0, x = hidden_states_29_cast_fp16)[name = string("mean_15_cast_fp16")];
            tensor<fp16, [1, 64, 3072]> input_49_cast_fp16 = sub(x = hidden_states_29_cast_fp16, y = mean_15_cast_fp16)[name = string("input_49_cast_fp16")];
            tensor<int32, [1]> var_1002_axes_0 = const()[name = string("op_1002_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [3072]> model_model_layers_10_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_10_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755292608)))];
            tensor<fp16, [1, 64, 3072]> var_1002_cast_fp16 = layer_norm(axes = var_1002_axes_0, epsilon = var_48_to_fp16, gamma = model_model_layers_10_post_attention_layernorm_weight_to_fp16, x = input_49_cast_fp16)[name = string("op_1002_cast_fp16")];
            tensor<int32, [3]> var_1009 = const()[name = string("op_1009"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> input_51_axes_0 = const()[name = string("input_51_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 3072, 64]> var_1010 = transpose(perm = var_1009, x = var_1002_cast_fp16)[name = string("transpose_22")];
            tensor<fp16, [1, 3072, 1, 64]> input_51 = expand_dims(axes = input_51_axes_0, x = var_1010)[name = string("input_51")];
            string input_53_pad_type_0 = const()[name = string("input_53_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> input_53_strides_0 = const()[name = string("input_53_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> input_53_pad_0 = const()[name = string("input_53_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> input_53_dilations_0 = const()[name = string("input_53_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 input_53_groups_0 = const()[name = string("input_53_groups_0"), val = int32(1)];
            tensor<fp16, [1, 8192, 1, 64]> input_53 = conv(dilations = input_53_dilations_0, groups = input_53_groups_0, pad = input_53_pad_0, pad_type = input_53_pad_type_0, strides = input_53_strides_0, weight = model_model_layers_10_mlp_gate_proj_weight_palettized, x = input_51)[name = string("input_53")];
            string up_states_7_pad_type_0 = const()[name = string("up_states_7_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> up_states_7_strides_0 = const()[name = string("up_states_7_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> up_states_7_pad_0 = const()[name = string("up_states_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> up_states_7_dilations_0 = const()[name = string("up_states_7_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 up_states_7_groups_0 = const()[name = string("up_states_7_groups_0"), val = int32(1)];
            tensor<fp16, [1, 8192, 1, 64]> up_states_7 = conv(dilations = up_states_7_dilations_0, groups = up_states_7_groups_0, pad = up_states_7_pad_0, pad_type = up_states_7_pad_type_0, strides = up_states_7_strides_0, weight = model_model_layers_10_mlp_up_proj_weight_palettized, x = input_51)[name = string("up_states_7")];
            tensor<fp16, [1, 8192, 1, 64]> gate_states_7 = silu(x = input_53)[name = string("gate_states_7")];
            tensor<fp16, [1, 8192, 1, 64]> input_55 = mul(x = gate_states_7, y = up_states_7)[name = string("input_55")];
            string hidden_states_31_pad_type_0 = const()[name = string("hidden_states_31_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> hidden_states_31_strides_0 = const()[name = string("hidden_states_31_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> hidden_states_31_pad_0 = const()[name = string("hidden_states_31_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> hidden_states_31_dilations_0 = const()[name = string("hidden_states_31_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 hidden_states_31_groups_0 = const()[name = string("hidden_states_31_groups_0"), val = int32(1)];
            tensor<fp16, [1, 3072, 1, 64]> hidden_states_31 = conv(dilations = hidden_states_31_dilations_0, groups = hidden_states_31_groups_0, pad = hidden_states_31_pad_0, pad_type = hidden_states_31_pad_type_0, strides = hidden_states_31_strides_0, weight = model_model_layers_10_mlp_down_proj_weight_palettized, x = input_55)[name = string("hidden_states_31")];
            tensor<int32, [1]> var_1032_axes_0 = const()[name = string("op_1032_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 3072, 64]> var_1032 = squeeze(axes = var_1032_axes_0, x = hidden_states_31)[name = string("op_1032")];
            tensor<int32, [3]> var_1033 = const()[name = string("op_1033"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 64, 3072]> var_1034 = transpose(perm = var_1033, x = var_1032)[name = string("transpose_21")];
            tensor<fp16, [1, 64, 3072]> hidden_states_33_cast_fp16 = add(x = hidden_states_29_cast_fp16, y = var_1034)[name = string("hidden_states_33_cast_fp16")];
            tensor<int32, [1]> mean_17_axes_0 = const()[name = string("mean_17_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_17_keep_dims_0 = const()[name = string("mean_17_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 64, 1]> mean_17_cast_fp16 = reduce_mean(axes = mean_17_axes_0, keep_dims = mean_17_keep_dims_0, x = hidden_states_33_cast_fp16)[name = string("mean_17_cast_fp16")];
            tensor<fp16, [1, 64, 3072]> input_57_cast_fp16 = sub(x = hidden_states_33_cast_fp16, y = mean_17_cast_fp16)[name = string("input_57_cast_fp16")];
            tensor<int32, [1]> var_1042_axes_0 = const()[name = string("op_1042_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [3072]> model_model_layers_11_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_11_input_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755298816)))];
            tensor<fp16, [1, 64, 3072]> var_1042_cast_fp16 = layer_norm(axes = var_1042_axes_0, epsilon = var_48_to_fp16, gamma = model_model_layers_11_input_layernorm_weight_to_fp16, x = input_57_cast_fp16)[name = string("op_1042_cast_fp16")];
            tensor<int32, [3]> var_1046 = const()[name = string("op_1046"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> var_1048_axes_0 = const()[name = string("op_1048_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 3072, 64]> var_1047 = transpose(perm = var_1046, x = var_1042_cast_fp16)[name = string("transpose_20")];
            tensor<fp16, [1, 3072, 1, 64]> var_1048 = expand_dims(axes = var_1048_axes_0, x = var_1047)[name = string("op_1048")];
            string query_states_17_pad_type_0 = const()[name = string("query_states_17_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> query_states_17_strides_0 = const()[name = string("query_states_17_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> query_states_17_pad_0 = const()[name = string("query_states_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> query_states_17_dilations_0 = const()[name = string("query_states_17_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 query_states_17_groups_0 = const()[name = string("query_states_17_groups_0"), val = int32(1)];
            tensor<fp16, [1, 3072, 1, 64]> query_states_17 = conv(dilations = query_states_17_dilations_0, groups = query_states_17_groups_0, pad = query_states_17_pad_0, pad_type = query_states_17_pad_type_0, strides = query_states_17_strides_0, weight = model_model_layers_11_self_attn_q_proj_weight_palettized, x = var_1048)[name = string("query_states_17")];
            string key_states_25_pad_type_0 = const()[name = string("key_states_25_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> key_states_25_strides_0 = const()[name = string("key_states_25_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> key_states_25_pad_0 = const()[name = string("key_states_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> key_states_25_dilations_0 = const()[name = string("key_states_25_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 key_states_25_groups_0 = const()[name = string("key_states_25_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 64]> key_states_25 = conv(dilations = key_states_25_dilations_0, groups = key_states_25_groups_0, pad = key_states_25_pad_0, pad_type = key_states_25_pad_type_0, strides = key_states_25_strides_0, weight = model_model_layers_11_self_attn_k_proj_weight_palettized, x = var_1048)[name = string("key_states_25")];
            string value_states_25_pad_type_0 = const()[name = string("value_states_25_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> value_states_25_strides_0 = const()[name = string("value_states_25_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> value_states_25_pad_0 = const()[name = string("value_states_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> value_states_25_dilations_0 = const()[name = string("value_states_25_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 value_states_25_groups_0 = const()[name = string("value_states_25_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 64]> value_states_25 = conv(dilations = value_states_25_dilations_0, groups = value_states_25_groups_0, pad = value_states_25_pad_0, pad_type = value_states_25_pad_type_0, strides = value_states_25_strides_0, weight = model_model_layers_11_self_attn_v_proj_weight_palettized, x = var_1048)[name = string("value_states_25")];
            tensor<int32, [4]> var_1068 = const()[name = string("op_1068"), val = tensor<int32, [4]>([1, 24, 128, 64])];
            tensor<fp16, [1, 24, 128, 64]> var_1069 = reshape(shape = var_1068, x = query_states_17)[name = string("op_1069")];
            tensor<int32, [4]> var_1070 = const()[name = string("op_1070"), val = tensor<int32, [4]>([0, 1, 3, 2])];
            tensor<int32, [4]> var_1072 = const()[name = string("op_1072"), val = tensor<int32, [4]>([1, 8, 128, 64])];
            tensor<fp16, [1, 8, 128, 64]> var_1073 = reshape(shape = var_1072, x = key_states_25)[name = string("op_1073")];
            tensor<int32, [4]> var_1074 = const()[name = string("op_1074"), val = tensor<int32, [4]>([0, 1, 3, 2])];
            tensor<int32, [4]> var_1076 = const()[name = string("op_1076"), val = tensor<int32, [4]>([1, 8, 128, 64])];
            tensor<fp16, [1, 8, 128, 64]> var_1077 = reshape(shape = var_1076, x = value_states_25)[name = string("op_1077")];
            tensor<int32, [4]> var_1078 = const()[name = string("op_1078"), val = tensor<int32, [4]>([0, 1, 3, 2])];
            tensor<int32, [4]> x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor<int32, [4]>([1, 24, 64, 64])];
            tensor<bool, [4]> x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 24, 64, 128]> x_113 = transpose(perm = var_1070, x = var_1069)[name = string("transpose_19")];
            tensor<fp16, [1, 24, 64, 64]> x1_17 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = x_113)[name = string("x1_17")];
            tensor<int32, [4]> x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor<int32, [4]>([1, 24, 64, 128])];
            tensor<bool, [4]> x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 24, 64, 64]> x2_17 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = x_113)[name = string("x2_17")];
            tensor<fp16, [1, 24, 64, 64]> var_1096 = mul(x = x1_17, y = cos_7)[name = string("op_1096")];
            tensor<fp16, [1, 24, 64, 64]> var_1097 = mul(x = x2_17, y = sin_7)[name = string("op_1097")];
            tensor<fp16, [1, 24, 64, 64]> var_1098 = sub(x = var_1096, y = var_1097)[name = string("op_1098")];
            tensor<fp16, [1, 24, 64, 64]> var_1099 = mul(x = x2_17, y = cos_7)[name = string("op_1099")];
            tensor<fp16, [1, 24, 64, 64]> var_1100 = mul(x = x1_17, y = sin_7)[name = string("op_1100")];
            tensor<fp16, [1, 24, 64, 64]> var_1101 = add(x = var_1099, y = var_1100)[name = string("op_1101")];
            bool rotated_17_interleave_0 = const()[name = string("rotated_17_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 24, 64, 128]> rotated_17 = concat(axis = var_46, interleave = rotated_17_interleave_0, values = (var_1098, var_1101))[name = string("rotated_17")];
            tensor<int32, [4]> x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor<int32, [4]>([1, 8, 64, 64])];
            tensor<bool, [4]> x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 8, 64, 128]> x_117 = transpose(perm = var_1074, x = var_1073)[name = string("transpose_18")];
            tensor<fp16, [1, 8, 64, 64]> x1_19 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = x_117)[name = string("x1_19")];
            tensor<int32, [4]> x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor<int32, [4]>([1, 8, 64, 128])];
            tensor<bool, [4]> x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 8, 64, 64]> x2_19 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = x_117)[name = string("x2_19")];
            tensor<fp16, [1, 8, 64, 64]> var_1117 = mul(x = x1_19, y = cos_7)[name = string("op_1117")];
            tensor<fp16, [1, 8, 64, 64]> var_1118 = mul(x = x2_19, y = sin_7)[name = string("op_1118")];
            tensor<fp16, [1, 8, 64, 64]> var_1119 = sub(x = var_1117, y = var_1118)[name = string("op_1119")];
            tensor<fp16, [1, 8, 64, 64]> var_1120 = mul(x = x2_19, y = cos_7)[name = string("op_1120")];
            tensor<fp16, [1, 8, 64, 64]> var_1121 = mul(x = x1_19, y = sin_7)[name = string("op_1121")];
            tensor<fp16, [1, 8, 64, 64]> var_1122 = add(x = var_1120, y = var_1121)[name = string("op_1122")];
            bool rotated_19_interleave_0 = const()[name = string("rotated_19_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 8, 64, 128]> rotated_19 = concat(axis = var_46, interleave = rotated_19_interleave_0, values = (var_1119, var_1122))[name = string("rotated_19")];
            tensor<int32, [1]> expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor<int32, [1]>([11])];
            tensor<int32, [1]> expand_dims_49 = const()[name = string("expand_dims_49"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_52 = const()[name = string("expand_dims_52"), val = tensor<int32, [1]>([12])];
            int32 concat_74_axis_0 = const()[name = string("concat_74_axis_0"), val = int32(0)];
            bool concat_74_interleave_0 = const()[name = string("concat_74_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_74 = concat(axis = concat_74_axis_0, interleave = concat_74_interleave_0, values = (expand_dims_48, expand_dims_49, current_pos, expand_dims_51))[name = string("concat_74")];
            tensor<int32, [1]> concat_75_values1_0 = const()[name = string("concat_75_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_75_values3_0 = const()[name = string("concat_75_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_75_axis_0 = const()[name = string("concat_75_axis_0"), val = int32(0)];
            bool concat_75_interleave_0 = const()[name = string("concat_75_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_75 = concat(axis = concat_75_axis_0, interleave = concat_75_interleave_0, values = (expand_dims_52, concat_75_values1_0, var_343, concat_75_values3_0))[name = string("concat_75")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_9_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_74, begin_mask = model_model_kv_cache_0_internal_tensor_assign_9_begin_mask_0, end = concat_75, end_mask = model_model_kv_cache_0_internal_tensor_assign_9_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_9_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_9_stride_0, update = rotated_19, x = coreml_update_state_21)[name = string("model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_9_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_22_write_state")];
            tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_22 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_22")];
            tensor<int32, [1]> expand_dims_54 = const()[name = string("expand_dims_54"), val = tensor<int32, [1]>([39])];
            tensor<int32, [1]> expand_dims_55 = const()[name = string("expand_dims_55"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_57 = const()[name = string("expand_dims_57"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_58 = const()[name = string("expand_dims_58"), val = tensor<int32, [1]>([40])];
            int32 concat_78_axis_0 = const()[name = string("concat_78_axis_0"), val = int32(0)];
            bool concat_78_interleave_0 = const()[name = string("concat_78_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_78 = concat(axis = concat_78_axis_0, interleave = concat_78_interleave_0, values = (expand_dims_54, expand_dims_55, current_pos, expand_dims_57))[name = string("concat_78")];
            tensor<int32, [1]> concat_79_values1_0 = const()[name = string("concat_79_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_79_values3_0 = const()[name = string("concat_79_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_79_axis_0 = const()[name = string("concat_79_axis_0"), val = int32(0)];
            bool concat_79_interleave_0 = const()[name = string("concat_79_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_79 = concat(axis = concat_79_axis_0, interleave = concat_79_interleave_0, values = (expand_dims_58, concat_79_values1_0, var_343, concat_79_values3_0))[name = string("concat_79")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_10_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [1, 8, 64, 128]> value_states_27 = transpose(perm = var_1078, x = var_1077)[name = string("transpose_17")];
            tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_78, begin_mask = model_model_kv_cache_0_internal_tensor_assign_10_begin_mask_0, end = concat_79, end_mask = model_model_kv_cache_0_internal_tensor_assign_10_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_10_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_10_stride_0, update = value_states_27, x = coreml_update_state_22)[name = string("model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_10_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_23_write_state")];
            tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_23 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_23")];
            tensor<int32, [4]> var_1145_begin_0 = const()[name = string("op_1145_begin_0"), val = tensor<int32, [4]>([11, 0, 0, 0])];
            tensor<int32, [4]> var_1145_end_0 = const()[name = string("op_1145_end_0"), val = tensor<int32, [4]>([12, 8, 1024, 128])];
            tensor<bool, [4]> var_1145_end_mask_0 = const()[name = string("op_1145_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_1145_cast_fp16 = slice_by_index(begin = var_1145_begin_0, end = var_1145_end_0, end_mask = var_1145_end_mask_0, x = coreml_update_state_23)[name = string("op_1145_cast_fp16")];
            tensor<int32, [1]> K_layer_cache_9_axes_0 = const()[name = string("K_layer_cache_9_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> K_layer_cache_9_cast_fp16 = squeeze(axes = K_layer_cache_9_axes_0, x = var_1145_cast_fp16)[name = string("K_layer_cache_9_cast_fp16")];
            tensor<int32, [4]> var_1147_begin_0 = const()[name = string("op_1147_begin_0"), val = tensor<int32, [4]>([39, 0, 0, 0])];
            tensor<int32, [4]> var_1147_end_0 = const()[name = string("op_1147_end_0"), val = tensor<int32, [4]>([40, 8, 1024, 128])];
            tensor<bool, [4]> var_1147_end_mask_0 = const()[name = string("op_1147_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_1147_cast_fp16 = slice_by_index(begin = var_1147_begin_0, end = var_1147_end_0, end_mask = var_1147_end_mask_0, x = coreml_update_state_23)[name = string("op_1147_cast_fp16")];
            tensor<int32, [1]> V_layer_cache_9_axes_0 = const()[name = string("V_layer_cache_9_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> V_layer_cache_9_cast_fp16 = squeeze(axes = V_layer_cache_9_axes_0, x = var_1147_cast_fp16)[name = string("V_layer_cache_9_cast_fp16")];
            tensor<int32, [1]> x_123_axes_0 = const()[name = string("x_123_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_123_cast_fp16 = expand_dims(axes = x_123_axes_0, x = K_layer_cache_9_cast_fp16)[name = string("x_123_cast_fp16")];
            tensor<int32, [4]> var_1156 = const()[name = string("op_1156"), val = tensor<int32, [4]>([1, 3, 1, 1])];
            tensor<fp16, [8, 3, 1024, 128]> x_125_cast_fp16 = tile(reps = var_1156, x = x_123_cast_fp16)[name = string("x_125_cast_fp16")];
            tensor<int32, [4]> var_1160 = const()[name = string("op_1160"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
            tensor<fp16, [1, 24, 1024, 128]> var_1161_cast_fp16 = reshape(shape = var_1160, x = x_125_cast_fp16)[name = string("op_1161_cast_fp16")];
            tensor<int32, [1]> x_129_axes_0 = const()[name = string("x_129_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_129_cast_fp16 = expand_dims(axes = x_129_axes_0, x = V_layer_cache_9_cast_fp16)[name = string("x_129_cast_fp16")];
            tensor<int32, [4]> var_1163 = const()[name = string("op_1163"), val = tensor<int32, [4]>([1, 3, 1, 1])];
            tensor<fp16, [8, 3, 1024, 128]> x_131_cast_fp16 = tile(reps = var_1163, x = x_129_cast_fp16)[name = string("x_131_cast_fp16")];
            bool var_1170_transpose_x_0 = const()[name = string("op_1170_transpose_x_0"), val = bool(false)];
            bool var_1170_transpose_y_0 = const()[name = string("op_1170_transpose_y_0"), val = bool(true)];
            tensor<fp16, [1, 24, 64, 1024]> var_1170_cast_fp16 = matmul(transpose_x = var_1170_transpose_x_0, transpose_y = var_1170_transpose_y_0, x = rotated_17, y = var_1161_cast_fp16)[name = string("op_1170_cast_fp16")];
            fp16 var_1171_to_fp16 = const()[name = string("op_1171_to_fp16"), val = fp16(0x1.6ap-4)];
            tensor<fp16, [1, 24, 64, 1024]> attn_weights_9_cast_fp16 = mul(x = var_1170_cast_fp16, y = var_1171_to_fp16)[name = string("attn_weights_9_cast_fp16")];
            tensor<fp16, [1, 24, 64, 1024]> x_133_cast_fp16 = add(x = attn_weights_9_cast_fp16, y = causal_mask)[name = string("x_133_cast_fp16")];
            tensor<int32, [1]> reduce_max_4_axes_0 = const()[name = string("reduce_max_4_axes_0"), val = tensor<int32, [1]>([-1])];
            bool reduce_max_4_keep_dims_0 = const()[name = string("reduce_max_4_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 24, 64, 1]> reduce_max_4_cast_fp16 = reduce_max(axes = reduce_max_4_axes_0, keep_dims = reduce_max_4_keep_dims_0, x = x_133_cast_fp16)[name = string("reduce_max_4_cast_fp16")];
            tensor<fp16, [1, 24, 64, 1024]> x_135_cast_fp16 = sub(x = x_133_cast_fp16, y = reduce_max_4_cast_fp16)[name = string("x_135_cast_fp16")];
            tensor<fp16, [1, 24, 64, 1024]> exp_x_9_cast_fp16 = exp(x = x_135_cast_fp16)[name = string("exp_x_9_cast_fp16")];
            tensor<int32, [1]> var_1182_axes_0 = const()[name = string("op_1182_axes_0"), val = tensor<int32, [1]>([-1])];
            bool var_1182_keep_dims_0 = const()[name = string("op_1182_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 24, 64, 1]> var_1182_cast_fp16 = reduce_sum(axes = var_1182_axes_0, keep_dims = var_1182_keep_dims_0, x = exp_x_9_cast_fp16)[name = string("op_1182_cast_fp16")];
            tensor<fp16, [1, 24, 64, 1024]> var_1183_cast_fp16 = real_div(x = exp_x_9_cast_fp16, y = var_1182_cast_fp16)[name = string("op_1183_cast_fp16")];
            tensor<int32, [3]> concat_84 = const()[name = string("concat_84"), val = tensor<int32, [3]>([24, 64, 1024])];
            tensor<fp16, [24, 64, 1024]> reshape_12_cast_fp16 = reshape(shape = concat_84, x = var_1183_cast_fp16)[name = string("reshape_12_cast_fp16")];
            tensor<int32, [3]> concat_85 = const()[name = string("concat_85"), val = tensor<int32, [3]>([24, 1024, 128])];
            tensor<fp16, [24, 1024, 128]> reshape_13_cast_fp16 = reshape(shape = concat_85, x = x_131_cast_fp16)[name = string("reshape_13_cast_fp16")];
            bool matmul_4_transpose_x_0 = const()[name = string("matmul_4_transpose_x_0"), val = bool(false)];
            bool matmul_4_transpose_y_0 = const()[name = string("matmul_4_transpose_y_0"), val = bool(false)];
            tensor<fp16, [24, 64, 128]> matmul_4_cast_fp16 = matmul(transpose_x = matmul_4_transpose_x_0, transpose_y = matmul_4_transpose_y_0, x = reshape_12_cast_fp16, y = reshape_13_cast_fp16)[name = string("matmul_4_cast_fp16")];
            tensor<int32, [4]> concat_89 = const()[name = string("concat_89"), val = tensor<int32, [4]>([1, 24, 64, 128])];
            tensor<fp16, [1, 24, 64, 128]> reshape_14_cast_fp16 = reshape(shape = concat_89, x = matmul_4_cast_fp16)[name = string("reshape_14_cast_fp16")];
            tensor<int32, [4]> var_1186_perm_0 = const()[name = string("op_1186_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_1188 = const()[name = string("op_1188"), val = tensor<int32, [3]>([1, 64, 3072])];
            tensor<fp16, [1, 64, 24, 128]> var_1186_cast_fp16 = transpose(perm = var_1186_perm_0, x = reshape_14_cast_fp16)[name = string("transpose_16")];
            tensor<fp16, [1, 64, 3072]> input_61_cast_fp16 = reshape(shape = var_1188, x = var_1186_cast_fp16)[name = string("input_61_cast_fp16")];
            tensor<fp16, [3072, 3072]> model_model_layers_11_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755305024))), lut = tensor<fp16, [384, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(764742272))))[name = string("model_model_layers_11_self_attn_o_proj_weight_promoted_to_fp16_palettized")];
            tensor<fp16, [1, 64, 3072]> linear_4_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_11_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_61_cast_fp16)[name = string("linear_4_cast_fp16")];
            tensor<fp16, [1, 64, 3072]> hidden_states_37_cast_fp16 = add(x = hidden_states_33_cast_fp16, y = linear_4_cast_fp16)[name = string("hidden_states_37_cast_fp16")];
            tensor<int32, [1]> mean_19_axes_0 = const()[name = string("mean_19_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_19_keep_dims_0 = const()[name = string("mean_19_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 64, 1]> mean_19_cast_fp16 = reduce_mean(axes = mean_19_axes_0, keep_dims = mean_19_keep_dims_0, x = hidden_states_37_cast_fp16)[name = string("mean_19_cast_fp16")];
            tensor<fp16, [1, 64, 3072]> input_63_cast_fp16 = sub(x = hidden_states_37_cast_fp16, y = mean_19_cast_fp16)[name = string("input_63_cast_fp16")];
            tensor<int32, [1]> var_1199_axes_0 = const()[name = string("op_1199_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [3072]> model_model_layers_11_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_11_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(764938944)))];
            tensor<fp16, [1, 64, 3072]> var_1199_cast_fp16 = layer_norm(axes = var_1199_axes_0, epsilon = var_48_to_fp16, gamma = model_model_layers_11_post_attention_layernorm_weight_to_fp16, x = input_63_cast_fp16)[name = string("op_1199_cast_fp16")];
            tensor<int32, [3]> var_1206 = const()[name = string("op_1206"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> input_65_axes_0 = const()[name = string("input_65_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 3072, 64]> var_1207 = transpose(perm = var_1206, x = var_1199_cast_fp16)[name = string("transpose_15")];
            tensor<fp16, [1, 3072, 1, 64]> input_65 = expand_dims(axes = input_65_axes_0, x = var_1207)[name = string("input_65")];
            string input_67_pad_type_0 = const()[name = string("input_67_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> input_67_strides_0 = const()[name = string("input_67_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> input_67_pad_0 = const()[name = string("input_67_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> input_67_dilations_0 = const()[name = string("input_67_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 input_67_groups_0 = const()[name = string("input_67_groups_0"), val = int32(1)];
            tensor<fp16, [1, 8192, 1, 64]> input_67 = conv(dilations = input_67_dilations_0, groups = input_67_groups_0, pad = input_67_pad_0, pad_type = input_67_pad_type_0, strides = input_67_strides_0, weight = model_model_layers_11_mlp_gate_proj_weight_palettized, x = input_65)[name = string("input_67")];
            string up_states_9_pad_type_0 = const()[name = string("up_states_9_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> up_states_9_strides_0 = const()[name = string("up_states_9_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> up_states_9_pad_0 = const()[name = string("up_states_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> up_states_9_dilations_0 = const()[name = string("up_states_9_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 up_states_9_groups_0 = const()[name = string("up_states_9_groups_0"), val = int32(1)];
            tensor<fp16, [1, 8192, 1, 64]> up_states_9 = conv(dilations = up_states_9_dilations_0, groups = up_states_9_groups_0, pad = up_states_9_pad_0, pad_type = up_states_9_pad_type_0, strides = up_states_9_strides_0, weight = model_model_layers_11_mlp_up_proj_weight_palettized, x = input_65)[name = string("up_states_9")];
            tensor<fp16, [1, 8192, 1, 64]> gate_states_9 = silu(x = input_67)[name = string("gate_states_9")];
            tensor<fp16, [1, 8192, 1, 64]> input_69 = mul(x = gate_states_9, y = up_states_9)[name = string("input_69")];
            string hidden_states_39_pad_type_0 = const()[name = string("hidden_states_39_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> hidden_states_39_strides_0 = const()[name = string("hidden_states_39_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> hidden_states_39_pad_0 = const()[name = string("hidden_states_39_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> hidden_states_39_dilations_0 = const()[name = string("hidden_states_39_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 hidden_states_39_groups_0 = const()[name = string("hidden_states_39_groups_0"), val = int32(1)];
            tensor<fp16, [1, 3072, 1, 64]> hidden_states_39 = conv(dilations = hidden_states_39_dilations_0, groups = hidden_states_39_groups_0, pad = hidden_states_39_pad_0, pad_type = hidden_states_39_pad_type_0, strides = hidden_states_39_strides_0, weight = model_model_layers_11_mlp_down_proj_weight_palettized, x = input_69)[name = string("hidden_states_39")];
            tensor<int32, [1]> var_1229_axes_0 = const()[name = string("op_1229_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 3072, 64]> var_1229 = squeeze(axes = var_1229_axes_0, x = hidden_states_39)[name = string("op_1229")];
            tensor<int32, [3]> var_1230 = const()[name = string("op_1230"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 64, 3072]> var_1231 = transpose(perm = var_1230, x = var_1229)[name = string("transpose_14")];
            tensor<fp16, [1, 64, 3072]> hidden_states_41_cast_fp16 = add(x = hidden_states_37_cast_fp16, y = var_1231)[name = string("hidden_states_41_cast_fp16")];
            tensor<int32, [1]> mean_21_axes_0 = const()[name = string("mean_21_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_21_keep_dims_0 = const()[name = string("mean_21_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 64, 1]> mean_21_cast_fp16 = reduce_mean(axes = mean_21_axes_0, keep_dims = mean_21_keep_dims_0, x = hidden_states_41_cast_fp16)[name = string("mean_21_cast_fp16")];
            tensor<fp16, [1, 64, 3072]> input_71_cast_fp16 = sub(x = hidden_states_41_cast_fp16, y = mean_21_cast_fp16)[name = string("input_71_cast_fp16")];
            tensor<int32, [1]> var_1239_axes_0 = const()[name = string("op_1239_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [3072]> model_model_layers_12_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_12_input_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(764945152)))];
            tensor<fp16, [1, 64, 3072]> var_1239_cast_fp16 = layer_norm(axes = var_1239_axes_0, epsilon = var_48_to_fp16, gamma = model_model_layers_12_input_layernorm_weight_to_fp16, x = input_71_cast_fp16)[name = string("op_1239_cast_fp16")];
            tensor<int32, [3]> var_1243 = const()[name = string("op_1243"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> var_1245_axes_0 = const()[name = string("op_1245_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 3072, 64]> var_1244 = transpose(perm = var_1243, x = var_1239_cast_fp16)[name = string("transpose_13")];
            tensor<fp16, [1, 3072, 1, 64]> var_1245 = expand_dims(axes = var_1245_axes_0, x = var_1244)[name = string("op_1245")];
            string query_states_21_pad_type_0 = const()[name = string("query_states_21_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> query_states_21_strides_0 = const()[name = string("query_states_21_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> query_states_21_pad_0 = const()[name = string("query_states_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> query_states_21_dilations_0 = const()[name = string("query_states_21_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 query_states_21_groups_0 = const()[name = string("query_states_21_groups_0"), val = int32(1)];
            tensor<fp16, [1, 3072, 1, 64]> query_states_21 = conv(dilations = query_states_21_dilations_0, groups = query_states_21_groups_0, pad = query_states_21_pad_0, pad_type = query_states_21_pad_type_0, strides = query_states_21_strides_0, weight = model_model_layers_12_self_attn_q_proj_weight_palettized, x = var_1245)[name = string("query_states_21")];
            string key_states_31_pad_type_0 = const()[name = string("key_states_31_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> key_states_31_strides_0 = const()[name = string("key_states_31_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> key_states_31_pad_0 = const()[name = string("key_states_31_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> key_states_31_dilations_0 = const()[name = string("key_states_31_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 key_states_31_groups_0 = const()[name = string("key_states_31_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 64]> key_states_31 = conv(dilations = key_states_31_dilations_0, groups = key_states_31_groups_0, pad = key_states_31_pad_0, pad_type = key_states_31_pad_type_0, strides = key_states_31_strides_0, weight = model_model_layers_12_self_attn_k_proj_weight_palettized, x = var_1245)[name = string("key_states_31")];
            string value_states_31_pad_type_0 = const()[name = string("value_states_31_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> value_states_31_strides_0 = const()[name = string("value_states_31_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> value_states_31_pad_0 = const()[name = string("value_states_31_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> value_states_31_dilations_0 = const()[name = string("value_states_31_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 value_states_31_groups_0 = const()[name = string("value_states_31_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 64]> value_states_31 = conv(dilations = value_states_31_dilations_0, groups = value_states_31_groups_0, pad = value_states_31_pad_0, pad_type = value_states_31_pad_type_0, strides = value_states_31_strides_0, weight = model_model_layers_12_self_attn_v_proj_weight_palettized, x = var_1245)[name = string("value_states_31")];
            tensor<int32, [4]> var_1265 = const()[name = string("op_1265"), val = tensor<int32, [4]>([1, 24, 128, 64])];
            tensor<fp16, [1, 24, 128, 64]> var_1266 = reshape(shape = var_1265, x = query_states_21)[name = string("op_1266")];
            tensor<int32, [4]> var_1267 = const()[name = string("op_1267"), val = tensor<int32, [4]>([0, 1, 3, 2])];
            tensor<int32, [4]> var_1269 = const()[name = string("op_1269"), val = tensor<int32, [4]>([1, 8, 128, 64])];
            tensor<fp16, [1, 8, 128, 64]> var_1270 = reshape(shape = var_1269, x = key_states_31)[name = string("op_1270")];
            tensor<int32, [4]> var_1271 = const()[name = string("op_1271"), val = tensor<int32, [4]>([0, 1, 3, 2])];
            tensor<int32, [4]> var_1273 = const()[name = string("op_1273"), val = tensor<int32, [4]>([1, 8, 128, 64])];
            tensor<fp16, [1, 8, 128, 64]> var_1274 = reshape(shape = var_1273, x = value_states_31)[name = string("op_1274")];
            tensor<int32, [4]> var_1275 = const()[name = string("op_1275"), val = tensor<int32, [4]>([0, 1, 3, 2])];
            tensor<int32, [4]> x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor<int32, [4]>([1, 24, 64, 64])];
            tensor<bool, [4]> x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 24, 64, 128]> x_141 = transpose(perm = var_1267, x = var_1266)[name = string("transpose_12")];
            tensor<fp16, [1, 24, 64, 64]> x1_21 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = x_141)[name = string("x1_21")];
            tensor<int32, [4]> x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor<int32, [4]>([1, 24, 64, 128])];
            tensor<bool, [4]> x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 24, 64, 64]> x2_21 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = x_141)[name = string("x2_21")];
            tensor<fp16, [1, 24, 64, 64]> var_1293 = mul(x = x1_21, y = cos_7)[name = string("op_1293")];
            tensor<fp16, [1, 24, 64, 64]> var_1294 = mul(x = x2_21, y = sin_7)[name = string("op_1294")];
            tensor<fp16, [1, 24, 64, 64]> var_1295 = sub(x = var_1293, y = var_1294)[name = string("op_1295")];
            tensor<fp16, [1, 24, 64, 64]> var_1296 = mul(x = x2_21, y = cos_7)[name = string("op_1296")];
            tensor<fp16, [1, 24, 64, 64]> var_1297 = mul(x = x1_21, y = sin_7)[name = string("op_1297")];
            tensor<fp16, [1, 24, 64, 64]> var_1298 = add(x = var_1296, y = var_1297)[name = string("op_1298")];
            bool rotated_21_interleave_0 = const()[name = string("rotated_21_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 24, 64, 128]> rotated_21 = concat(axis = var_46, interleave = rotated_21_interleave_0, values = (var_1295, var_1298))[name = string("rotated_21")];
            tensor<int32, [4]> x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor<int32, [4]>([1, 8, 64, 64])];
            tensor<bool, [4]> x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 8, 64, 128]> x_145 = transpose(perm = var_1271, x = var_1270)[name = string("transpose_11")];
            tensor<fp16, [1, 8, 64, 64]> x1_23 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = x_145)[name = string("x1_23")];
            tensor<int32, [4]> x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor<int32, [4]>([1, 8, 64, 128])];
            tensor<bool, [4]> x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 8, 64, 64]> x2_23 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = x_145)[name = string("x2_23")];
            tensor<fp16, [1, 8, 64, 64]> var_1314 = mul(x = x1_23, y = cos_7)[name = string("op_1314")];
            tensor<fp16, [1, 8, 64, 64]> var_1315 = mul(x = x2_23, y = sin_7)[name = string("op_1315")];
            tensor<fp16, [1, 8, 64, 64]> var_1316 = sub(x = var_1314, y = var_1315)[name = string("op_1316")];
            tensor<fp16, [1, 8, 64, 64]> var_1317 = mul(x = x2_23, y = cos_7)[name = string("op_1317")];
            tensor<fp16, [1, 8, 64, 64]> var_1318 = mul(x = x1_23, y = sin_7)[name = string("op_1318")];
            tensor<fp16, [1, 8, 64, 64]> var_1319 = add(x = var_1317, y = var_1318)[name = string("op_1319")];
            bool rotated_23_interleave_0 = const()[name = string("rotated_23_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 8, 64, 128]> rotated_23 = concat(axis = var_46, interleave = rotated_23_interleave_0, values = (var_1316, var_1319))[name = string("rotated_23")];
            tensor<int32, [1]> expand_dims_60 = const()[name = string("expand_dims_60"), val = tensor<int32, [1]>([12])];
            tensor<int32, [1]> expand_dims_61 = const()[name = string("expand_dims_61"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_63 = const()[name = string("expand_dims_63"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_64 = const()[name = string("expand_dims_64"), val = tensor<int32, [1]>([13])];
            int32 concat_92_axis_0 = const()[name = string("concat_92_axis_0"), val = int32(0)];
            bool concat_92_interleave_0 = const()[name = string("concat_92_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_92 = concat(axis = concat_92_axis_0, interleave = concat_92_interleave_0, values = (expand_dims_60, expand_dims_61, current_pos, expand_dims_63))[name = string("concat_92")];
            tensor<int32, [1]> concat_93_values1_0 = const()[name = string("concat_93_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_93_values3_0 = const()[name = string("concat_93_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_93_axis_0 = const()[name = string("concat_93_axis_0"), val = int32(0)];
            bool concat_93_interleave_0 = const()[name = string("concat_93_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_93 = concat(axis = concat_93_axis_0, interleave = concat_93_interleave_0, values = (expand_dims_64, concat_93_values1_0, var_343, concat_93_values3_0))[name = string("concat_93")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_11_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_92, begin_mask = model_model_kv_cache_0_internal_tensor_assign_11_begin_mask_0, end = concat_93, end_mask = model_model_kv_cache_0_internal_tensor_assign_11_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_11_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_11_stride_0, update = rotated_23, x = coreml_update_state_23)[name = string("model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_11_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_24_write_state")];
            tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_24 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_24")];
            tensor<int32, [1]> expand_dims_66 = const()[name = string("expand_dims_66"), val = tensor<int32, [1]>([40])];
            tensor<int32, [1]> expand_dims_67 = const()[name = string("expand_dims_67"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_69 = const()[name = string("expand_dims_69"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_70 = const()[name = string("expand_dims_70"), val = tensor<int32, [1]>([41])];
            int32 concat_96_axis_0 = const()[name = string("concat_96_axis_0"), val = int32(0)];
            bool concat_96_interleave_0 = const()[name = string("concat_96_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_96 = concat(axis = concat_96_axis_0, interleave = concat_96_interleave_0, values = (expand_dims_66, expand_dims_67, current_pos, expand_dims_69))[name = string("concat_96")];
            tensor<int32, [1]> concat_97_values1_0 = const()[name = string("concat_97_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_97_values3_0 = const()[name = string("concat_97_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_97_axis_0 = const()[name = string("concat_97_axis_0"), val = int32(0)];
            bool concat_97_interleave_0 = const()[name = string("concat_97_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_97 = concat(axis = concat_97_axis_0, interleave = concat_97_interleave_0, values = (expand_dims_70, concat_97_values1_0, var_343, concat_97_values3_0))[name = string("concat_97")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_12_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [1, 8, 64, 128]> value_states_33 = transpose(perm = var_1275, x = var_1274)[name = string("transpose_10")];
            tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_96, begin_mask = model_model_kv_cache_0_internal_tensor_assign_12_begin_mask_0, end = concat_97, end_mask = model_model_kv_cache_0_internal_tensor_assign_12_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_12_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_12_stride_0, update = value_states_33, x = coreml_update_state_24)[name = string("model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_12_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_25_write_state")];
            tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_25 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_25")];
            tensor<int32, [4]> var_1342_begin_0 = const()[name = string("op_1342_begin_0"), val = tensor<int32, [4]>([12, 0, 0, 0])];
            tensor<int32, [4]> var_1342_end_0 = const()[name = string("op_1342_end_0"), val = tensor<int32, [4]>([13, 8, 1024, 128])];
            tensor<bool, [4]> var_1342_end_mask_0 = const()[name = string("op_1342_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_1342_cast_fp16 = slice_by_index(begin = var_1342_begin_0, end = var_1342_end_0, end_mask = var_1342_end_mask_0, x = coreml_update_state_25)[name = string("op_1342_cast_fp16")];
            tensor<int32, [1]> K_layer_cache_11_axes_0 = const()[name = string("K_layer_cache_11_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> K_layer_cache_11_cast_fp16 = squeeze(axes = K_layer_cache_11_axes_0, x = var_1342_cast_fp16)[name = string("K_layer_cache_11_cast_fp16")];
            tensor<int32, [4]> var_1344_begin_0 = const()[name = string("op_1344_begin_0"), val = tensor<int32, [4]>([40, 0, 0, 0])];
            tensor<int32, [4]> var_1344_end_0 = const()[name = string("op_1344_end_0"), val = tensor<int32, [4]>([41, 8, 1024, 128])];
            tensor<bool, [4]> var_1344_end_mask_0 = const()[name = string("op_1344_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_1344_cast_fp16 = slice_by_index(begin = var_1344_begin_0, end = var_1344_end_0, end_mask = var_1344_end_mask_0, x = coreml_update_state_25)[name = string("op_1344_cast_fp16")];
            tensor<int32, [1]> V_layer_cache_11_axes_0 = const()[name = string("V_layer_cache_11_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> V_layer_cache_11_cast_fp16 = squeeze(axes = V_layer_cache_11_axes_0, x = var_1344_cast_fp16)[name = string("V_layer_cache_11_cast_fp16")];
            tensor<int32, [1]> x_151_axes_0 = const()[name = string("x_151_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_151_cast_fp16 = expand_dims(axes = x_151_axes_0, x = K_layer_cache_11_cast_fp16)[name = string("x_151_cast_fp16")];
            tensor<int32, [4]> var_1353 = const()[name = string("op_1353"), val = tensor<int32, [4]>([1, 3, 1, 1])];
            tensor<fp16, [8, 3, 1024, 128]> x_153_cast_fp16 = tile(reps = var_1353, x = x_151_cast_fp16)[name = string("x_153_cast_fp16")];
            tensor<int32, [4]> var_1357 = const()[name = string("op_1357"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
            tensor<fp16, [1, 24, 1024, 128]> var_1358_cast_fp16 = reshape(shape = var_1357, x = x_153_cast_fp16)[name = string("op_1358_cast_fp16")];
            tensor<int32, [1]> x_157_axes_0 = const()[name = string("x_157_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_157_cast_fp16 = expand_dims(axes = x_157_axes_0, x = V_layer_cache_11_cast_fp16)[name = string("x_157_cast_fp16")];
            tensor<int32, [4]> var_1360 = const()[name = string("op_1360"), val = tensor<int32, [4]>([1, 3, 1, 1])];
            tensor<fp16, [8, 3, 1024, 128]> x_159_cast_fp16 = tile(reps = var_1360, x = x_157_cast_fp16)[name = string("x_159_cast_fp16")];
            bool var_1367_transpose_x_0 = const()[name = string("op_1367_transpose_x_0"), val = bool(false)];
            bool var_1367_transpose_y_0 = const()[name = string("op_1367_transpose_y_0"), val = bool(true)];
            tensor<fp16, [1, 24, 64, 1024]> var_1367_cast_fp16 = matmul(transpose_x = var_1367_transpose_x_0, transpose_y = var_1367_transpose_y_0, x = rotated_21, y = var_1358_cast_fp16)[name = string("op_1367_cast_fp16")];
            fp16 var_1368_to_fp16 = const()[name = string("op_1368_to_fp16"), val = fp16(0x1.6ap-4)];
            tensor<fp16, [1, 24, 64, 1024]> attn_weights_11_cast_fp16 = mul(x = var_1367_cast_fp16, y = var_1368_to_fp16)[name = string("attn_weights_11_cast_fp16")];
            tensor<fp16, [1, 24, 64, 1024]> x_161_cast_fp16 = add(x = attn_weights_11_cast_fp16, y = causal_mask)[name = string("x_161_cast_fp16")];
            tensor<int32, [1]> reduce_max_5_axes_0 = const()[name = string("reduce_max_5_axes_0"), val = tensor<int32, [1]>([-1])];
            bool reduce_max_5_keep_dims_0 = const()[name = string("reduce_max_5_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 24, 64, 1]> reduce_max_5_cast_fp16 = reduce_max(axes = reduce_max_5_axes_0, keep_dims = reduce_max_5_keep_dims_0, x = x_161_cast_fp16)[name = string("reduce_max_5_cast_fp16")];
            tensor<fp16, [1, 24, 64, 1024]> x_163_cast_fp16 = sub(x = x_161_cast_fp16, y = reduce_max_5_cast_fp16)[name = string("x_163_cast_fp16")];
            tensor<fp16, [1, 24, 64, 1024]> exp_x_11_cast_fp16 = exp(x = x_163_cast_fp16)[name = string("exp_x_11_cast_fp16")];
            tensor<int32, [1]> var_1379_axes_0 = const()[name = string("op_1379_axes_0"), val = tensor<int32, [1]>([-1])];
            bool var_1379_keep_dims_0 = const()[name = string("op_1379_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 24, 64, 1]> var_1379_cast_fp16 = reduce_sum(axes = var_1379_axes_0, keep_dims = var_1379_keep_dims_0, x = exp_x_11_cast_fp16)[name = string("op_1379_cast_fp16")];
            tensor<fp16, [1, 24, 64, 1024]> var_1380_cast_fp16 = real_div(x = exp_x_11_cast_fp16, y = var_1379_cast_fp16)[name = string("op_1380_cast_fp16")];
            tensor<int32, [3]> concat_102 = const()[name = string("concat_102"), val = tensor<int32, [3]>([24, 64, 1024])];
            tensor<fp16, [24, 64, 1024]> reshape_15_cast_fp16 = reshape(shape = concat_102, x = var_1380_cast_fp16)[name = string("reshape_15_cast_fp16")];
            tensor<int32, [3]> concat_103 = const()[name = string("concat_103"), val = tensor<int32, [3]>([24, 1024, 128])];
            tensor<fp16, [24, 1024, 128]> reshape_16_cast_fp16 = reshape(shape = concat_103, x = x_159_cast_fp16)[name = string("reshape_16_cast_fp16")];
            bool matmul_5_transpose_x_0 = const()[name = string("matmul_5_transpose_x_0"), val = bool(false)];
            bool matmul_5_transpose_y_0 = const()[name = string("matmul_5_transpose_y_0"), val = bool(false)];
            tensor<fp16, [24, 64, 128]> matmul_5_cast_fp16 = matmul(transpose_x = matmul_5_transpose_x_0, transpose_y = matmul_5_transpose_y_0, x = reshape_15_cast_fp16, y = reshape_16_cast_fp16)[name = string("matmul_5_cast_fp16")];
            tensor<int32, [4]> concat_107 = const()[name = string("concat_107"), val = tensor<int32, [4]>([1, 24, 64, 128])];
            tensor<fp16, [1, 24, 64, 128]> reshape_17_cast_fp16 = reshape(shape = concat_107, x = matmul_5_cast_fp16)[name = string("reshape_17_cast_fp16")];
            tensor<int32, [4]> var_1383_perm_0 = const()[name = string("op_1383_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_1385 = const()[name = string("op_1385"), val = tensor<int32, [3]>([1, 64, 3072])];
            tensor<fp16, [1, 64, 24, 128]> var_1383_cast_fp16 = transpose(perm = var_1383_perm_0, x = reshape_17_cast_fp16)[name = string("transpose_9")];
            tensor<fp16, [1, 64, 3072]> input_75_cast_fp16 = reshape(shape = var_1385, x = var_1383_cast_fp16)[name = string("input_75_cast_fp16")];
            tensor<fp16, [3072, 3072]> model_model_layers_12_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(764951360))), lut = tensor<fp16, [384, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774388608))))[name = string("model_model_layers_12_self_attn_o_proj_weight_promoted_to_fp16_palettized")];
            tensor<fp16, [1, 64, 3072]> linear_5_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_12_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_75_cast_fp16)[name = string("linear_5_cast_fp16")];
            tensor<fp16, [1, 64, 3072]> hidden_states_45_cast_fp16 = add(x = hidden_states_41_cast_fp16, y = linear_5_cast_fp16)[name = string("hidden_states_45_cast_fp16")];
            tensor<int32, [1]> mean_23_axes_0 = const()[name = string("mean_23_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_23_keep_dims_0 = const()[name = string("mean_23_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 64, 1]> mean_23_cast_fp16 = reduce_mean(axes = mean_23_axes_0, keep_dims = mean_23_keep_dims_0, x = hidden_states_45_cast_fp16)[name = string("mean_23_cast_fp16")];
            tensor<fp16, [1, 64, 3072]> input_77_cast_fp16 = sub(x = hidden_states_45_cast_fp16, y = mean_23_cast_fp16)[name = string("input_77_cast_fp16")];
            tensor<int32, [1]> var_1396_axes_0 = const()[name = string("op_1396_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [3072]> model_model_layers_12_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_12_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774585280)))];
            tensor<fp16, [1, 64, 3072]> var_1396_cast_fp16 = layer_norm(axes = var_1396_axes_0, epsilon = var_48_to_fp16, gamma = model_model_layers_12_post_attention_layernorm_weight_to_fp16, x = input_77_cast_fp16)[name = string("op_1396_cast_fp16")];
            tensor<int32, [3]> var_1403 = const()[name = string("op_1403"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> input_79_axes_0 = const()[name = string("input_79_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 3072, 64]> var_1404 = transpose(perm = var_1403, x = var_1396_cast_fp16)[name = string("transpose_8")];
            tensor<fp16, [1, 3072, 1, 64]> input_79 = expand_dims(axes = input_79_axes_0, x = var_1404)[name = string("input_79")];
            string input_81_pad_type_0 = const()[name = string("input_81_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> input_81_strides_0 = const()[name = string("input_81_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> input_81_pad_0 = const()[name = string("input_81_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> input_81_dilations_0 = const()[name = string("input_81_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 input_81_groups_0 = const()[name = string("input_81_groups_0"), val = int32(1)];
            tensor<fp16, [1, 8192, 1, 64]> input_81 = conv(dilations = input_81_dilations_0, groups = input_81_groups_0, pad = input_81_pad_0, pad_type = input_81_pad_type_0, strides = input_81_strides_0, weight = model_model_layers_12_mlp_gate_proj_weight_palettized, x = input_79)[name = string("input_81")];
            string up_states_11_pad_type_0 = const()[name = string("up_states_11_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> up_states_11_strides_0 = const()[name = string("up_states_11_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> up_states_11_pad_0 = const()[name = string("up_states_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> up_states_11_dilations_0 = const()[name = string("up_states_11_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 up_states_11_groups_0 = const()[name = string("up_states_11_groups_0"), val = int32(1)];
            tensor<fp16, [1, 8192, 1, 64]> up_states_11 = conv(dilations = up_states_11_dilations_0, groups = up_states_11_groups_0, pad = up_states_11_pad_0, pad_type = up_states_11_pad_type_0, strides = up_states_11_strides_0, weight = model_model_layers_12_mlp_up_proj_weight_palettized, x = input_79)[name = string("up_states_11")];
            tensor<fp16, [1, 8192, 1, 64]> gate_states_11 = silu(x = input_81)[name = string("gate_states_11")];
            tensor<fp16, [1, 8192, 1, 64]> input_83 = mul(x = gate_states_11, y = up_states_11)[name = string("input_83")];
            string hidden_states_47_pad_type_0 = const()[name = string("hidden_states_47_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> hidden_states_47_strides_0 = const()[name = string("hidden_states_47_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> hidden_states_47_pad_0 = const()[name = string("hidden_states_47_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> hidden_states_47_dilations_0 = const()[name = string("hidden_states_47_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 hidden_states_47_groups_0 = const()[name = string("hidden_states_47_groups_0"), val = int32(1)];
            tensor<fp16, [1, 3072, 1, 64]> hidden_states_47 = conv(dilations = hidden_states_47_dilations_0, groups = hidden_states_47_groups_0, pad = hidden_states_47_pad_0, pad_type = hidden_states_47_pad_type_0, strides = hidden_states_47_strides_0, weight = model_model_layers_12_mlp_down_proj_weight_palettized, x = input_83)[name = string("hidden_states_47")];
            tensor<int32, [1]> var_1426_axes_0 = const()[name = string("op_1426_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 3072, 64]> var_1426 = squeeze(axes = var_1426_axes_0, x = hidden_states_47)[name = string("op_1426")];
            tensor<int32, [3]> var_1427 = const()[name = string("op_1427"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 64, 3072]> var_1428 = transpose(perm = var_1427, x = var_1426)[name = string("transpose_7")];
            tensor<fp16, [1, 64, 3072]> hidden_states_49_cast_fp16 = add(x = hidden_states_45_cast_fp16, y = var_1428)[name = string("hidden_states_49_cast_fp16")];
            tensor<int32, [1]> mean_25_axes_0 = const()[name = string("mean_25_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_25_keep_dims_0 = const()[name = string("mean_25_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 64, 1]> mean_25_cast_fp16 = reduce_mean(axes = mean_25_axes_0, keep_dims = mean_25_keep_dims_0, x = hidden_states_49_cast_fp16)[name = string("mean_25_cast_fp16")];
            tensor<fp16, [1, 64, 3072]> input_85_cast_fp16 = sub(x = hidden_states_49_cast_fp16, y = mean_25_cast_fp16)[name = string("input_85_cast_fp16")];
            tensor<int32, [1]> var_1436_axes_0 = const()[name = string("op_1436_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [3072]> model_model_layers_13_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_13_input_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774591488)))];
            tensor<fp16, [1, 64, 3072]> var_1436_cast_fp16 = layer_norm(axes = var_1436_axes_0, epsilon = var_48_to_fp16, gamma = model_model_layers_13_input_layernorm_weight_to_fp16, x = input_85_cast_fp16)[name = string("op_1436_cast_fp16")];
            tensor<int32, [3]> var_1440 = const()[name = string("op_1440"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> var_1442_axes_0 = const()[name = string("op_1442_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 3072, 64]> var_1441 = transpose(perm = var_1440, x = var_1436_cast_fp16)[name = string("transpose_6")];
            tensor<fp16, [1, 3072, 1, 64]> var_1442 = expand_dims(axes = var_1442_axes_0, x = var_1441)[name = string("op_1442")];
            string query_states_25_pad_type_0 = const()[name = string("query_states_25_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> query_states_25_strides_0 = const()[name = string("query_states_25_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> query_states_25_pad_0 = const()[name = string("query_states_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> query_states_25_dilations_0 = const()[name = string("query_states_25_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 query_states_25_groups_0 = const()[name = string("query_states_25_groups_0"), val = int32(1)];
            tensor<fp16, [1, 3072, 1, 64]> query_states_25 = conv(dilations = query_states_25_dilations_0, groups = query_states_25_groups_0, pad = query_states_25_pad_0, pad_type = query_states_25_pad_type_0, strides = query_states_25_strides_0, weight = model_model_layers_13_self_attn_q_proj_weight_palettized, x = var_1442)[name = string("query_states_25")];
            string key_states_37_pad_type_0 = const()[name = string("key_states_37_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> key_states_37_strides_0 = const()[name = string("key_states_37_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> key_states_37_pad_0 = const()[name = string("key_states_37_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> key_states_37_dilations_0 = const()[name = string("key_states_37_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 key_states_37_groups_0 = const()[name = string("key_states_37_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 64]> key_states_37 = conv(dilations = key_states_37_dilations_0, groups = key_states_37_groups_0, pad = key_states_37_pad_0, pad_type = key_states_37_pad_type_0, strides = key_states_37_strides_0, weight = model_model_layers_13_self_attn_k_proj_weight_palettized, x = var_1442)[name = string("key_states_37")];
            string value_states_37_pad_type_0 = const()[name = string("value_states_37_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> value_states_37_strides_0 = const()[name = string("value_states_37_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> value_states_37_pad_0 = const()[name = string("value_states_37_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> value_states_37_dilations_0 = const()[name = string("value_states_37_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 value_states_37_groups_0 = const()[name = string("value_states_37_groups_0"), val = int32(1)];
            tensor<fp16, [1, 1024, 1, 64]> value_states_37 = conv(dilations = value_states_37_dilations_0, groups = value_states_37_groups_0, pad = value_states_37_pad_0, pad_type = value_states_37_pad_type_0, strides = value_states_37_strides_0, weight = model_model_layers_13_self_attn_v_proj_weight_palettized, x = var_1442)[name = string("value_states_37")];
            tensor<int32, [4]> var_1462 = const()[name = string("op_1462"), val = tensor<int32, [4]>([1, 24, 128, 64])];
            tensor<fp16, [1, 24, 128, 64]> var_1463 = reshape(shape = var_1462, x = query_states_25)[name = string("op_1463")];
            tensor<int32, [4]> var_1464 = const()[name = string("op_1464"), val = tensor<int32, [4]>([0, 1, 3, 2])];
            tensor<int32, [4]> var_1466 = const()[name = string("op_1466"), val = tensor<int32, [4]>([1, 8, 128, 64])];
            tensor<fp16, [1, 8, 128, 64]> var_1467 = reshape(shape = var_1466, x = key_states_37)[name = string("op_1467")];
            tensor<int32, [4]> var_1468 = const()[name = string("op_1468"), val = tensor<int32, [4]>([0, 1, 3, 2])];
            tensor<int32, [4]> var_1470 = const()[name = string("op_1470"), val = tensor<int32, [4]>([1, 8, 128, 64])];
            tensor<fp16, [1, 8, 128, 64]> var_1471 = reshape(shape = var_1470, x = value_states_37)[name = string("op_1471")];
            tensor<int32, [4]> var_1472 = const()[name = string("op_1472"), val = tensor<int32, [4]>([0, 1, 3, 2])];
            tensor<int32, [4]> x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor<int32, [4]>([1, 24, 64, 64])];
            tensor<bool, [4]> x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 24, 64, 128]> x_169 = transpose(perm = var_1464, x = var_1463)[name = string("transpose_5")];
            tensor<fp16, [1, 24, 64, 64]> x1_25 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = x_169)[name = string("x1_25")];
            tensor<int32, [4]> x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor<int32, [4]>([1, 24, 64, 128])];
            tensor<bool, [4]> x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 24, 64, 64]> x2_25 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = x_169)[name = string("x2_25")];
            tensor<fp16, [1, 24, 64, 64]> var_1490 = mul(x = x1_25, y = cos_7)[name = string("op_1490")];
            tensor<fp16, [1, 24, 64, 64]> var_1491 = mul(x = x2_25, y = sin_7)[name = string("op_1491")];
            tensor<fp16, [1, 24, 64, 64]> var_1492 = sub(x = var_1490, y = var_1491)[name = string("op_1492")];
            tensor<fp16, [1, 24, 64, 64]> var_1493 = mul(x = x2_25, y = cos_7)[name = string("op_1493")];
            tensor<fp16, [1, 24, 64, 64]> var_1494 = mul(x = x1_25, y = sin_7)[name = string("op_1494")];
            tensor<fp16, [1, 24, 64, 64]> var_1495 = add(x = var_1493, y = var_1494)[name = string("op_1495")];
            bool rotated_25_interleave_0 = const()[name = string("rotated_25_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 24, 64, 128]> rotated_25 = concat(axis = var_46, interleave = rotated_25_interleave_0, values = (var_1492, var_1495))[name = string("rotated_25")];
            tensor<int32, [4]> x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [4]> x1_end_0 = const()[name = string("x1_end_0"), val = tensor<int32, [4]>([1, 8, 64, 64])];
            tensor<bool, [4]> x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor<bool, [4]>([true, true, true, false])];
            tensor<fp16, [1, 8, 64, 128]> x_173 = transpose(perm = var_1468, x = var_1467)[name = string("transpose_4")];
            tensor<fp16, [1, 8, 64, 64]> x1 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = x_173)[name = string("x1")];
            tensor<int32, [4]> x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 64])];
            tensor<int32, [4]> x2_end_0 = const()[name = string("x2_end_0"), val = tensor<int32, [4]>([1, 8, 64, 128])];
            tensor<bool, [4]> x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor<bool, [4]>([true, true, true, true])];
            tensor<fp16, [1, 8, 64, 64]> x2 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = x_173)[name = string("x2")];
            tensor<fp16, [1, 8, 64, 64]> var_1511 = mul(x = x1, y = cos_7)[name = string("op_1511")];
            tensor<fp16, [1, 8, 64, 64]> var_1512 = mul(x = x2, y = sin_7)[name = string("op_1512")];
            tensor<fp16, [1, 8, 64, 64]> var_1513 = sub(x = var_1511, y = var_1512)[name = string("op_1513")];
            tensor<fp16, [1, 8, 64, 64]> var_1514 = mul(x = x2, y = cos_7)[name = string("op_1514")];
            tensor<fp16, [1, 8, 64, 64]> var_1515 = mul(x = x1, y = sin_7)[name = string("op_1515")];
            tensor<fp16, [1, 8, 64, 64]> var_1516 = add(x = var_1514, y = var_1515)[name = string("op_1516")];
            bool rotated_interleave_0 = const()[name = string("rotated_interleave_0"), val = bool(false)];
            tensor<fp16, [1, 8, 64, 128]> rotated = concat(axis = var_46, interleave = rotated_interleave_0, values = (var_1513, var_1516))[name = string("rotated")];
            tensor<int32, [1]> expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor<int32, [1]>([13])];
            tensor<int32, [1]> expand_dims_73 = const()[name = string("expand_dims_73"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_75 = const()[name = string("expand_dims_75"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_76 = const()[name = string("expand_dims_76"), val = tensor<int32, [1]>([14])];
            int32 concat_110_axis_0 = const()[name = string("concat_110_axis_0"), val = int32(0)];
            bool concat_110_interleave_0 = const()[name = string("concat_110_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_110 = concat(axis = concat_110_axis_0, interleave = concat_110_interleave_0, values = (expand_dims_72, expand_dims_73, current_pos, expand_dims_75))[name = string("concat_110")];
            tensor<int32, [1]> concat_111_values1_0 = const()[name = string("concat_111_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_111_values3_0 = const()[name = string("concat_111_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_111_axis_0 = const()[name = string("concat_111_axis_0"), val = int32(0)];
            bool concat_111_interleave_0 = const()[name = string("concat_111_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_111 = concat(axis = concat_111_axis_0, interleave = concat_111_interleave_0, values = (expand_dims_76, concat_111_values1_0, var_343, concat_111_values3_0))[name = string("concat_111")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_13_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_110, begin_mask = model_model_kv_cache_0_internal_tensor_assign_13_begin_mask_0, end = concat_111, end_mask = model_model_kv_cache_0_internal_tensor_assign_13_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_13_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_13_stride_0, update = rotated, x = coreml_update_state_25)[name = string("model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_13_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_26_write_state")];
            tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_26 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_26")];
            tensor<int32, [1]> expand_dims_78 = const()[name = string("expand_dims_78"), val = tensor<int32, [1]>([41])];
            tensor<int32, [1]> expand_dims_79 = const()[name = string("expand_dims_79"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_81 = const()[name = string("expand_dims_81"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> expand_dims_82 = const()[name = string("expand_dims_82"), val = tensor<int32, [1]>([42])];
            int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)];
            bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (expand_dims_78, expand_dims_79, current_pos, expand_dims_81))[name = string("concat_114")];
            tensor<int32, [1]> concat_115_values1_0 = const()[name = string("concat_115_values1_0"), val = tensor<int32, [1]>([0])];
            tensor<int32, [1]> concat_115_values3_0 = const()[name = string("concat_115_values3_0"), val = tensor<int32, [1]>([0])];
            int32 concat_115_axis_0 = const()[name = string("concat_115_axis_0"), val = int32(0)];
            bool concat_115_interleave_0 = const()[name = string("concat_115_interleave_0"), val = bool(false)];
            tensor<int32, [4]> concat_115 = concat(axis = concat_115_axis_0, interleave = concat_115_interleave_0, values = (expand_dims_82, concat_115_values1_0, var_343, concat_115_values3_0))[name = string("concat_115")];
            tensor<int32, [4]> model_model_kv_cache_0_internal_tensor_assign_14_stride_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_stride_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0"), val = tensor<bool, [4]>([false, true, false, true])];
            tensor<bool, [4]> model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, false, false])];
            tensor<fp16, [1, 8, 64, 128]> value_states_39 = transpose(perm = var_1472, x = var_1471)[name = string("transpose_3")];
            tensor<fp16, [56, 8, 1024, 128]> model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_114, begin_mask = model_model_kv_cache_0_internal_tensor_assign_14_begin_mask_0, end = concat_115, end_mask = model_model_kv_cache_0_internal_tensor_assign_14_end_mask_0, squeeze_mask = model_model_kv_cache_0_internal_tensor_assign_14_squeeze_mask_0, stride = model_model_kv_cache_0_internal_tensor_assign_14_stride_0, update = value_states_39, x = coreml_update_state_26)[name = string("model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16")];
            write_state(data = model_model_kv_cache_0_internal_tensor_assign_14_cast_fp16, input = model_model_kv_cache_0)[name = string("coreml_update_state_27_write_state")];
            tensor<fp16, [56, 8, 1024, 128]> coreml_update_state_27 = read_state(input = model_model_kv_cache_0)[name = string("coreml_update_state_27")];
            tensor<int32, [4]> var_1539_begin_0 = const()[name = string("op_1539_begin_0"), val = tensor<int32, [4]>([13, 0, 0, 0])];
            tensor<int32, [4]> var_1539_end_0 = const()[name = string("op_1539_end_0"), val = tensor<int32, [4]>([14, 8, 1024, 128])];
            tensor<bool, [4]> var_1539_end_mask_0 = const()[name = string("op_1539_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_1539_cast_fp16 = slice_by_index(begin = var_1539_begin_0, end = var_1539_end_0, end_mask = var_1539_end_mask_0, x = coreml_update_state_27)[name = string("op_1539_cast_fp16")];
            tensor<int32, [1]> K_layer_cache_axes_0 = const()[name = string("K_layer_cache_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> K_layer_cache_cast_fp16 = squeeze(axes = K_layer_cache_axes_0, x = var_1539_cast_fp16)[name = string("K_layer_cache_cast_fp16")];
            tensor<int32, [4]> var_1541_begin_0 = const()[name = string("op_1541_begin_0"), val = tensor<int32, [4]>([41, 0, 0, 0])];
            tensor<int32, [4]> var_1541_end_0 = const()[name = string("op_1541_end_0"), val = tensor<int32, [4]>([42, 8, 1024, 128])];
            tensor<bool, [4]> var_1541_end_mask_0 = const()[name = string("op_1541_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
            tensor<fp16, [1, 8, 1024, 128]> var_1541_cast_fp16 = slice_by_index(begin = var_1541_begin_0, end = var_1541_end_0, end_mask = var_1541_end_mask_0, x = coreml_update_state_27)[name = string("op_1541_cast_fp16")];
            tensor<int32, [1]> V_layer_cache_axes_0 = const()[name = string("V_layer_cache_axes_0"), val = tensor<int32, [1]>([0])];
            tensor<fp16, [8, 1024, 128]> V_layer_cache_cast_fp16 = squeeze(axes = V_layer_cache_axes_0, x = var_1541_cast_fp16)[name = string("V_layer_cache_cast_fp16")];
            tensor<int32, [1]> x_179_axes_0 = const()[name = string("x_179_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_179_cast_fp16 = expand_dims(axes = x_179_axes_0, x = K_layer_cache_cast_fp16)[name = string("x_179_cast_fp16")];
            tensor<int32, [4]> var_1550 = const()[name = string("op_1550"), val = tensor<int32, [4]>([1, 3, 1, 1])];
            tensor<fp16, [8, 3, 1024, 128]> x_181_cast_fp16 = tile(reps = var_1550, x = x_179_cast_fp16)[name = string("x_181_cast_fp16")];
            tensor<int32, [4]> var_1554 = const()[name = string("op_1554"), val = tensor<int32, [4]>([1, -1, 1024, 128])];
            tensor<fp16, [1, 24, 1024, 128]> var_1555_cast_fp16 = reshape(shape = var_1554, x = x_181_cast_fp16)[name = string("op_1555_cast_fp16")];
            tensor<int32, [1]> x_185_axes_0 = const()[name = string("x_185_axes_0"), val = tensor<int32, [1]>([1])];
            tensor<fp16, [8, 1, 1024, 128]> x_185_cast_fp16 = expand_dims(axes = x_185_axes_0, x = V_layer_cache_cast_fp16)[name = string("x_185_cast_fp16")];
            tensor<int32, [4]> var_1557 = const()[name = string("op_1557"), val = tensor<int32, [4]>([1, 3, 1, 1])];
            tensor<fp16, [8, 3, 1024, 128]> x_187_cast_fp16 = tile(reps = var_1557, x = x_185_cast_fp16)[name = string("x_187_cast_fp16")];
            bool var_1564_transpose_x_0 = const()[name = string("op_1564_transpose_x_0"), val = bool(false)];
            bool var_1564_transpose_y_0 = const()[name = string("op_1564_transpose_y_0"), val = bool(true)];
            tensor<fp16, [1, 24, 64, 1024]> var_1564_cast_fp16 = matmul(transpose_x = var_1564_transpose_x_0, transpose_y = var_1564_transpose_y_0, x = rotated_25, y = var_1555_cast_fp16)[name = string("op_1564_cast_fp16")];
            fp16 var_1565_to_fp16 = const()[name = string("op_1565_to_fp16"), val = fp16(0x1.6ap-4)];
            tensor<fp16, [1, 24, 64, 1024]> attn_weights_cast_fp16 = mul(x = var_1564_cast_fp16, y = var_1565_to_fp16)[name = string("attn_weights_cast_fp16")];
            tensor<fp16, [1, 24, 64, 1024]> x_189_cast_fp16 = add(x = attn_weights_cast_fp16, y = causal_mask)[name = string("x_189_cast_fp16")];
            tensor<int32, [1]> reduce_max_6_axes_0 = const()[name = string("reduce_max_6_axes_0"), val = tensor<int32, [1]>([-1])];
            bool reduce_max_6_keep_dims_0 = const()[name = string("reduce_max_6_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 24, 64, 1]> reduce_max_6_cast_fp16 = reduce_max(axes = reduce_max_6_axes_0, keep_dims = reduce_max_6_keep_dims_0, x = x_189_cast_fp16)[name = string("reduce_max_6_cast_fp16")];
            tensor<fp16, [1, 24, 64, 1024]> x_191_cast_fp16 = sub(x = x_189_cast_fp16, y = reduce_max_6_cast_fp16)[name = string("x_191_cast_fp16")];
            tensor<fp16, [1, 24, 64, 1024]> exp_x_cast_fp16 = exp(x = x_191_cast_fp16)[name = string("exp_x_cast_fp16")];
            tensor<int32, [1]> var_1576_axes_0 = const()[name = string("op_1576_axes_0"), val = tensor<int32, [1]>([-1])];
            bool var_1576_keep_dims_0 = const()[name = string("op_1576_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 24, 64, 1]> var_1576_cast_fp16 = reduce_sum(axes = var_1576_axes_0, keep_dims = var_1576_keep_dims_0, x = exp_x_cast_fp16)[name = string("op_1576_cast_fp16")];
            tensor<fp16, [1, 24, 64, 1024]> var_1577_cast_fp16 = real_div(x = exp_x_cast_fp16, y = var_1576_cast_fp16)[name = string("op_1577_cast_fp16")];
            tensor<int32, [3]> concat_120 = const()[name = string("concat_120"), val = tensor<int32, [3]>([24, 64, 1024])];
            tensor<fp16, [24, 64, 1024]> reshape_18_cast_fp16 = reshape(shape = concat_120, x = var_1577_cast_fp16)[name = string("reshape_18_cast_fp16")];
            tensor<int32, [3]> concat_121 = const()[name = string("concat_121"), val = tensor<int32, [3]>([24, 1024, 128])];
            tensor<fp16, [24, 1024, 128]> reshape_19_cast_fp16 = reshape(shape = concat_121, x = x_187_cast_fp16)[name = string("reshape_19_cast_fp16")];
            bool matmul_6_transpose_x_0 = const()[name = string("matmul_6_transpose_x_0"), val = bool(false)];
            bool matmul_6_transpose_y_0 = const()[name = string("matmul_6_transpose_y_0"), val = bool(false)];
            tensor<fp16, [24, 64, 128]> matmul_6_cast_fp16 = matmul(transpose_x = matmul_6_transpose_x_0, transpose_y = matmul_6_transpose_y_0, x = reshape_18_cast_fp16, y = reshape_19_cast_fp16)[name = string("matmul_6_cast_fp16")];
            tensor<int32, [4]> concat_125 = const()[name = string("concat_125"), val = tensor<int32, [4]>([1, 24, 64, 128])];
            tensor<fp16, [1, 24, 64, 128]> reshape_20_cast_fp16 = reshape(shape = concat_125, x = matmul_6_cast_fp16)[name = string("reshape_20_cast_fp16")];
            tensor<int32, [4]> var_1580_perm_0 = const()[name = string("op_1580_perm_0"), val = tensor<int32, [4]>([0, 2, 1, 3])];
            tensor<int32, [3]> var_1582 = const()[name = string("op_1582"), val = tensor<int32, [3]>([1, 64, 3072])];
            tensor<fp16, [1, 64, 24, 128]> var_1580_cast_fp16 = transpose(perm = var_1580_perm_0, x = reshape_20_cast_fp16)[name = string("transpose_2")];
            tensor<fp16, [1, 64, 3072]> input_89_cast_fp16 = reshape(shape = var_1582, x = var_1580_cast_fp16)[name = string("input_89_cast_fp16")];
            tensor<fp16, [3072, 3072]> model_model_layers_13_self_attn_o_proj_weight_promoted_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint8, [3072, 3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(774597696))), lut = tensor<fp16, [384, 1, 256, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(784034944))))[name = string("model_model_layers_13_self_attn_o_proj_weight_promoted_to_fp16_palettized")];
            tensor<fp16, [1, 64, 3072]> linear_6_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_13_self_attn_o_proj_weight_promoted_to_fp16_palettized, x = input_89_cast_fp16)[name = string("linear_6_cast_fp16")];
            tensor<fp16, [1, 64, 3072]> hidden_states_53_cast_fp16 = add(x = hidden_states_49_cast_fp16, y = linear_6_cast_fp16)[name = string("hidden_states_53_cast_fp16")];
            tensor<int32, [1]> mean_axes_0 = const()[name = string("mean_axes_0"), val = tensor<int32, [1]>([-1])];
            bool mean_keep_dims_0 = const()[name = string("mean_keep_dims_0"), val = bool(true)];
            tensor<fp16, [1, 64, 1]> mean_cast_fp16 = reduce_mean(axes = mean_axes_0, keep_dims = mean_keep_dims_0, x = hidden_states_53_cast_fp16)[name = string("mean_cast_fp16")];
            tensor<fp16, [1, 64, 3072]> input_91_cast_fp16 = sub(x = hidden_states_53_cast_fp16, y = mean_cast_fp16)[name = string("input_91_cast_fp16")];
            tensor<int32, [1]> var_1593_axes_0 = const()[name = string("op_1593_axes_0"), val = tensor<int32, [1]>([-1])];
            tensor<fp16, [3072]> model_model_layers_13_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_13_post_attention_layernorm_weight_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(784231616)))];
            tensor<fp16, [1, 64, 3072]> var_1593_cast_fp16 = layer_norm(axes = var_1593_axes_0, epsilon = var_48_to_fp16, gamma = model_model_layers_13_post_attention_layernorm_weight_to_fp16, x = input_91_cast_fp16)[name = string("op_1593_cast_fp16")];
            tensor<int32, [3]> var_1600 = const()[name = string("op_1600"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<int32, [1]> input_93_axes_0 = const()[name = string("input_93_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 3072, 64]> var_1601 = transpose(perm = var_1600, x = var_1593_cast_fp16)[name = string("transpose_1")];
            tensor<fp16, [1, 3072, 1, 64]> input_93 = expand_dims(axes = input_93_axes_0, x = var_1601)[name = string("input_93")];
            string input_95_pad_type_0 = const()[name = string("input_95_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> input_95_strides_0 = const()[name = string("input_95_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> input_95_pad_0 = const()[name = string("input_95_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> input_95_dilations_0 = const()[name = string("input_95_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 input_95_groups_0 = const()[name = string("input_95_groups_0"), val = int32(1)];
            tensor<fp16, [1, 8192, 1, 64]> input_95 = conv(dilations = input_95_dilations_0, groups = input_95_groups_0, pad = input_95_pad_0, pad_type = input_95_pad_type_0, strides = input_95_strides_0, weight = model_model_layers_13_mlp_gate_proj_weight_palettized, x = input_93)[name = string("input_95")];
            string up_states_pad_type_0 = const()[name = string("up_states_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> up_states_strides_0 = const()[name = string("up_states_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> up_states_pad_0 = const()[name = string("up_states_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> up_states_dilations_0 = const()[name = string("up_states_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 up_states_groups_0 = const()[name = string("up_states_groups_0"), val = int32(1)];
            tensor<fp16, [1, 8192, 1, 64]> up_states = conv(dilations = up_states_dilations_0, groups = up_states_groups_0, pad = up_states_pad_0, pad_type = up_states_pad_type_0, strides = up_states_strides_0, weight = model_model_layers_13_mlp_up_proj_weight_palettized, x = input_93)[name = string("up_states")];
            tensor<fp16, [1, 8192, 1, 64]> gate_states = silu(x = input_95)[name = string("gate_states")];
            tensor<fp16, [1, 8192, 1, 64]> input = mul(x = gate_states, y = up_states)[name = string("input")];
            string hidden_states_pad_type_0 = const()[name = string("hidden_states_pad_type_0"), val = string("valid")];
            tensor<int32, [2]> hidden_states_strides_0 = const()[name = string("hidden_states_strides_0"), val = tensor<int32, [2]>([1, 1])];
            tensor<int32, [4]> hidden_states_pad_0 = const()[name = string("hidden_states_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
            tensor<int32, [2]> hidden_states_dilations_0 = const()[name = string("hidden_states_dilations_0"), val = tensor<int32, [2]>([1, 1])];
            int32 hidden_states_groups_0 = const()[name = string("hidden_states_groups_0"), val = int32(1)];
            tensor<fp16, [1, 3072, 1, 64]> hidden_states_1 = conv(dilations = hidden_states_dilations_0, groups = hidden_states_groups_0, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = hidden_states_strides_0, weight = model_model_layers_13_mlp_down_proj_weight_palettized, x = input)[name = string("hidden_states")];
            tensor<int32, [1]> var_1623_axes_0 = const()[name = string("op_1623_axes_0"), val = tensor<int32, [1]>([2])];
            tensor<fp16, [1, 3072, 64]> var_1623 = squeeze(axes = var_1623_axes_0, x = hidden_states_1)[name = string("op_1623")];
            tensor<int32, [3]> var_1624 = const()[name = string("op_1624"), val = tensor<int32, [3]>([0, 2, 1])];
            tensor<fp16, [1, 64, 3072]> var_1625 = transpose(perm = var_1624, x = var_1623)[name = string("transpose_0")];
            tensor<fp16, [1, 64, 3072]> output_hidden_states = add(x = hidden_states_53_cast_fp16, y = var_1625)[name = string("op_1626_cast_fp16")];
        } -> (output_hidden_states);
}